Source code for gpflux.sampling.kernel_with_feature_decomposition

#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
r"""
The classes in this module encapsulate kernels :math:`k(\cdot, \cdot)` with
their features :math:`\phi_i(\cdot)` and coefficients :math:`\lambda_i` so
that:

.. math::

    k(x, x') = \sum_{i=0}^\infty \lambda_i \phi_i(x) \phi_i(x').

The kernels are used for efficient sampling. See the tutorial notebooks
`Efficient sampling <../../../../notebooks/efficient_sampling.ipynb>`_
and `Weight Space Approximation with Random Fourier Features
<../../../../notebooks/weight_space_approximation.ipynb>`_
for an in-depth overview.
"""
from typing import Optional, Union

import tensorflow as tf

import gpflow
from gpflow.base import TensorType
from gpflow.keras import tf_keras

NoneType = type(None)


[docs]class _ApproximateKernel(gpflow.kernels.Kernel): r""" This class approximates a kernel by the finite feature decomposition: .. math:: k(x, x') = \sum_{i=0}^L \lambda_i \phi_i(x) \phi_i(x'), where :math:`\lambda_i` and :math:`\phi_i(\cdot)` are the coefficients and features, respectively. """ def __init__( self, feature_functions: tf_keras.layers.Layer, feature_coefficients: TensorType, ): r""" :param feature_functions: A Keras layer for which the call evaluates the ``L`` features of the kernel :math:`\phi_i(\cdot)`. For ``X`` with the shape ``[N, D]``, ``feature_functions(X)`` returns a tensor with the shape ``[N, L]``. :param feature_coefficients: A tensor with the shape ``[L, 1]`` with coefficients associated with the features, :math:`\lambda_i`. """ self._feature_functions = feature_functions self._feature_coefficients = feature_coefficients # [L, 1]
[docs] def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor: """Approximate the true kernel by an inner product between feature functions.""" phi = self._feature_functions(X) # [N, L] if X2 is None: phi2 = phi else: phi2 = self._feature_functions(X2) # [N2, L] r = tf.matmul( phi, tf.transpose(self._feature_coefficients) * phi2, transpose_b=True ) # [N, N2] N1, N2 = tf.shape(phi)[0], tf.shape(phi2)[0] tf.debugging.assert_equal(tf.shape(r), [N1, N2]) return r
[docs] def K_diag(self, X: TensorType) -> tf.Tensor: """Approximate the true kernel by an inner product between feature functions.""" phi_squared = self._feature_functions(X) ** 2 # [N, L] r = tf.reduce_sum(phi_squared * tf.transpose(self._feature_coefficients), axis=1) # [N,] N = tf.shape(X)[0] tf.debugging.assert_equal(tf.shape(r), [N]) # noqa: E231 return r
[docs]class KernelWithFeatureDecomposition(gpflow.kernels.Kernel): r""" This class represents a kernel together with its finite feature decomposition: .. math:: k(x, x') = \sum_{i=0}^L \lambda_i \phi_i(x) \phi_i(x'), where :math:`\lambda_i` and :math:`\phi_i(\cdot)` are the coefficients and features, respectively. The decomposition can be derived from Mercer or Bochner's theorem. For example, feature-coefficient pairs could be eigenfunction-eigenvalue pairs (Mercer) or Fourier features with constant coefficients (Bochner). In some cases (e.g., [1]_ and [2]_) the left-hand side (that is, the covariance function :math:`k(\cdot, \cdot)`) is unknown and the kernel can only be approximated using its feature decomposition. In other cases (e.g., [3]_ and [4]_), both the covariance function and feature decomposition are available in closed form. .. [1] Solin, Arno, and Simo Särkkä. "Hilbert space methods for reduced-rank Gaussian process regression." Statistics and Computing (2020). .. [2] Borovitskiy, Viacheslav, et al. "Matérn Gaussian processes on Riemannian manifolds." In Advances in Neural Information Processing Systems (2020). .. [3] Ali Rahimi and Benjamin Recht. Random features for large-scale kernel machines. In Advances in Neural Information Processing Systems (2007). .. [4] Dutordoir, Vincent, Nicolas Durrande, and James Hensman. "Sparse Gaussian processes with spherical harmonic features." In International Conference on Machine Learning (2020). """ def __init__( self, kernel: Union[gpflow.kernels.Kernel, NoneType], feature_functions: tf_keras.layers.Layer, feature_coefficients: TensorType, ): r""" :param kernel: The kernel corresponding to the feature decomposition. If ``None``, there is no analytical expression associated with the infinite sum and we approximate the kernel based on the feature decomposition. .. note:: In certain cases, the analytical expression for the kernel is not available. In this case, passing `None` is allowed, and :meth:`K` and :meth:`K_diag` will be computed using the approximation provided by the feature decomposition. :param feature_functions: A Keras layer for which the call evaluates the ``L`` features of the kernel :math:`\phi_i(\cdot)`. For ``X`` with the shape ``[N, D]``, ``feature_functions(X)`` returns a tensor with the shape ``[N, L]``. :param feature_coefficients: A tensor with the shape ``[L, 1]`` with coefficients associated with the features, :math:`\lambda_i`. """ super().__init__() if kernel is None: self._kernel = _ApproximateKernel(feature_functions, feature_coefficients) else: self._kernel = kernel self._feature_functions = feature_functions self._feature_coefficients = feature_coefficients # [L, 1] tf.ensure_shape(self._feature_coefficients, tf.TensorShape([None, 1])) @property
[docs] def feature_functions(self) -> tf_keras.layers.Layer: r"""Return the kernel's features :math:`\phi_i(\cdot)`.""" return self._feature_functions
@property
[docs] def feature_coefficients(self) -> tf.Tensor: r"""Return the kernel's coefficients :math:`\lambda_i`.""" return self._feature_coefficients
def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor: return self._kernel.K(X, X2) def K_diag(self, X: TensorType) -> tf.Tensor: return self._kernel.K_diag(X)