Source code for gpflux.sampling.kernel_with_feature_decomposition

#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
r"""
The classes in this module encapsulate kernels :math:`k(\cdot, \cdot)` with
their features :math:`\phi_i(\cdot)` and coefficients :math:`\lambda_i` so
that:

.. math::

    k(x, x') = \sum_{i=0}^\infty \lambda_i \phi_i(x) \phi_i(x').

The kernels are used for efficient sampling. See the tutorial notebooks
`Efficient sampling <../../../../notebooks/efficient_sampling.ipynb>`_
and `Weight Space Approximation with Random Fourier Features
<../../../../notebooks/weight_space_approximation.ipynb>`_
for an in-depth overview.
"""
from typing import Optional, Union

import tensorflow as tf

import gpflow
from gpflow.base import TensorType
from gpflow.keras import tf_keras

NoneType = type(None)


[docs]class _ApproximateKernel(gpflow.kernels.Kernel):
    r"""
    This class approximates a kernel by the finite feature decomposition:

    .. math:: k(x, x') = \sum_{i=0}^L \lambda_i \phi_i(x) \phi_i(x'),

    where :math:`\lambda_i` and :math:`\phi_i(\cdot)` are the coefficients
    and features, respectively.

    """

    def __init__(
        self,
        feature_functions: tf_keras.layers.Layer,
        feature_coefficients: TensorType,
    ):
        r"""
        :param feature_functions: A Keras layer for which the call evaluates the
            ``L`` features of the kernel :math:`\phi_i(\cdot)`. For ``X`` with the shape ``[N, D]``,
            ``feature_functions(X)`` returns a tensor with the shape ``[N, L]``.
        :param feature_coefficients: A tensor with the shape ``[L, 1]`` with coefficients
            associated with the features, :math:`\lambda_i`.
        """
        self._feature_functions = feature_functions
        self._feature_coefficients = feature_coefficients  # [L, 1]

[docs]    def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor:
        """Approximate the true kernel by an inner product between feature functions."""
        phi = self._feature_functions(X)  # [N, L]
        if X2 is None:
            phi2 = phi
        else:
            phi2 = self._feature_functions(X2)  # [N2, L]

        r = tf.matmul(
            phi, tf.transpose(self._feature_coefficients) * phi2, transpose_b=True
        )  # [N, N2]

        N1, N2 = tf.shape(phi)[0], tf.shape(phi2)[0]
        tf.debugging.assert_equal(tf.shape(r), [N1, N2])
        return r

[docs]    def K_diag(self, X: TensorType) -> tf.Tensor:
        """Approximate the true kernel by an inner product between feature functions."""
        phi_squared = self._feature_functions(X) ** 2  # [N, L]
        r = tf.reduce_sum(phi_squared * tf.transpose(self._feature_coefficients), axis=1)  # [N,]
        N = tf.shape(X)[0]
        tf.debugging.assert_equal(tf.shape(r), [N])  # noqa: E231
        return r


[docs]class KernelWithFeatureDecomposition(gpflow.kernels.Kernel):
    r"""
    This class represents a kernel together with its finite feature decomposition:

    .. math:: k(x, x') = \sum_{i=0}^L \lambda_i \phi_i(x) \phi_i(x'),

    where :math:`\lambda_i` and :math:`\phi_i(\cdot)` are the coefficients and
    features, respectively.

    The decomposition can be derived from Mercer or Bochner's theorem. For example,
    feature-coefficient pairs could be eigenfunction-eigenvalue pairs (Mercer) or
    Fourier features with constant coefficients (Bochner).

    In some cases (e.g., [1]_ and [2]_) the left-hand side (that is, the
    covariance function :math:`k(\cdot, \cdot)`) is unknown and the kernel
    can only be approximated using its feature decomposition.
    In other cases (e.g., [3]_ and [4]_), both the covariance function and feature
    decomposition are available in closed form.

    .. [1]
        Solin, Arno, and Simo Särkkä. "Hilbert space methods for
        reduced-rank Gaussian process regression." Statistics and Computing
        (2020).
    .. [2]
        Borovitskiy, Viacheslav, et al. "Matérn Gaussian processes on
        Riemannian manifolds." In Advances in Neural Information Processing
        Systems (2020).
    .. [3]
        Ali Rahimi and Benjamin Recht. Random features for large-scale kernel
        machines. In Advances in Neural Information Processing Systems (2007).
    .. [4]
        Dutordoir, Vincent, Nicolas Durrande, and James Hensman. "Sparse
        Gaussian processes with spherical harmonic features." In International
        Conference on Machine Learning (2020).
    """

    def __init__(
        self,
        kernel: Union[gpflow.kernels.Kernel, NoneType],
        feature_functions: tf_keras.layers.Layer,
        feature_coefficients: TensorType,
    ):
        r"""
        :param kernel: The kernel corresponding to the feature decomposition.
            If ``None``, there is no analytical expression associated with the infinite
            sum and we approximate the kernel based on the feature decomposition.

            .. note::

                In certain cases, the analytical expression for the kernel is
                not available. In this case, passing `None` is allowed, and
                :meth:`K` and :meth:`K_diag` will be computed using the
                approximation provided by the feature decomposition.

        :param feature_functions: A Keras layer for which the call evaluates the
            ``L`` features of the kernel :math:`\phi_i(\cdot)`. For ``X`` with the shape ``[N, D]``,
            ``feature_functions(X)`` returns a tensor with the shape ``[N, L]``.
        :param feature_coefficients: A tensor with the shape ``[L, 1]`` with coefficients
            associated with the features, :math:`\lambda_i`.
        """
        super().__init__()

        if kernel is None:
            self._kernel = _ApproximateKernel(feature_functions, feature_coefficients)
        else:
            self._kernel = kernel

        self._feature_functions = feature_functions
        self._feature_coefficients = feature_coefficients  # [L, 1]
        tf.ensure_shape(self._feature_coefficients, tf.TensorShape([None, 1]))

    @property
[docs]    def feature_functions(self) -> tf_keras.layers.Layer:
        r"""Return the kernel's features :math:`\phi_i(\cdot)`."""
        return self._feature_functions

    @property
[docs]    def feature_coefficients(self) -> tf.Tensor:
        r"""Return the kernel's coefficients :math:`\lambda_i`."""
        return self._feature_coefficients

    def K(self, X: TensorType, X2: Optional[TensorType] = None) -> tf.Tensor:
        return self._kernel.K(X, X2)

    def K_diag(self, X: TensorType) -> tf.Tensor:
        return self._kernel.K_diag(X)