Source code for trieste.acquisition.function.utils

# Copyright 2021 The Trieste Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains utility functions for acquisition functions.
"""
from typing import Callable, Tuple

import tensorflow as tf
from tensorflow_probability import distributions as tfd

from ...types import TensorType

# =============================================================================
# Multivariate Normal CDF
# =============================================================================


[docs]class MultivariateNormalCDF:
    def __init__(
        self,
        sample_size: int,
        dim: int,
        dtype: tf.DType,
        num_sobol_skip: int = 0,
    ) -> None:
        """Builds the cumulative density function of the multivariate Gaussian
        using the Genz approximation detailed in :cite:`genz2016numerical`.

        This is a Monte Carlo approximation which is more accurate than a naive
        Monte Carlo estimate of the expected improvent. In order to use
        reparametrised samples, the helper accepts a tensor of samples, and the
        callable uses these fixed samples whenever it is called.

        :param samples_size: int, number of samples to use.
        :param dim: int, dimension of the multivariate Gaussian.
        :param dtype: tf.DType, data type to use for calculations.
        :param num_sobol_skip: int, number of sobol samples to skip.
        """
        tf.debugging.assert_positive(sample_size)
        tf.debugging.assert_positive(dim)

        self._S = sample_size
        self._Q = dim
        self._dtype = dtype

        self._num_sobol_skip = num_sobol_skip

[docs]    def _standard_normal_cdf_and_inverse_cdf(
        self,
        dtype: tf.DType,
    ) -> Tuple[Callable[[TensorType], TensorType], Callable[[TensorType], TensorType]]:
        """Returns two callables *Phi* and *iPhi*, which compute the cumulative
        density function and inverse cumulative density function of a standard
        univariate Gaussian.

        :param dtype: The data type to use, either tf.float32 or tf.float64.
        :returns Phi, iPhi: Cumulative and inverse cumulative density functions.
        """

        normal = tfd.Normal(
            loc=tf.zeros(shape=(), dtype=dtype),
            scale=tf.ones(shape=(), dtype=dtype),
        )
        Phi: Callable[[TensorType], TensorType] = normal.cdf
        iPhi: Callable[[TensorType], TensorType] = normal.quantile

        return Phi, iPhi

[docs]    def _get_update_indices(self, B: int, S: int, Q: int, q: int) -> TensorType:
        """Returns indices for updating a tensor using tf.tensor_scatter_nd_add,
        for use within the _mvn_cdf function, for computing the cumulative density
        function of a multivariate Gaussian. The indices *idx* returned are such
        that the following operation

            idx = get_update_indices(B, S, Q, q)
            tensor = tf.tensor_scatter_nd_add(tensor, idx, update)

        is equivalent to the numpy operation

            tensor = tensor[:, :, q] + update

        where *tensor* is a tensor of shape (B, S, Q).

        :param B: First dim. of tensor for which the indices are generated.
        :param S: Second dim. of tensor for which the indices are generated.
        :param Q: Third dim. of tensor for which the indices are generated.
        :param q: Index of tensor along fourth dim. to which the update is applied.
        """

        idxB = tf.tile(tf.range(B, dtype=tf.int32)[:, None, None], (1, S, 1))
        idxS = tf.tile(tf.range(S, dtype=tf.int32)[None, :, None], (B, 1, 1))
        idxQ = tf.tile(tf.convert_to_tensor(q)[None, None, None], (B, S, 1))

        idx = tf.concat([idxB, idxS, idxQ], axis=-1)

        return idx

[docs]    def __call__(
        self,
        x: TensorType,
        mean: TensorType,
        cov: TensorType,
        jitter: float = 1e-6,
    ) -> TensorType:
        """Computes the cumulative density function of the multivariate
        Gaussian using the Genz approximation.

        :param x: Tensor of shape (B, Q), batch of points to evaluate CDF at.
        :param mean: Tensor of shape (B, Q), batch of means.
        :param covariance: Tensor of shape (B, Q, Q), batch of covariances.
        :param jitter: float, jitter to use in the Cholesky factorisation.
        :returns mvn_cdf: Tensor of shape (B,), CDF values.
        """

        # Unpack batch size
        B = x.shape[0]
        tf.debugging.assert_positive(B)

        # Check shapes of input tensors
        tf.debugging.assert_shapes(
            [
                (x, (B, self._Q)),
                (mean, (B, self._Q)),
                (cov, (B, self._Q, self._Q)),
            ]
        )

        # Identify data type to use for all calculations
        dtype = mean.dtype

        # Compute Cholesky factors
        jitter = jitter * tf.eye(self._Q, dtype=dtype)[None, :, :]
        C = tf.linalg.cholesky(cov + jitter)  # (B, Q, Q)

        # Rename samples and limits for brevity
        w = tf.math.sobol_sample(
            dim=self._Q,
            num_results=self._S,
            dtype=self._dtype,
            skip=self._num_sobol_skip,
        )  # (S, Q)
        b = x - mean  # (B, Q)

        # Initialise transformation variables
        e = tf.zeros(shape=(B, self._S, self._Q), dtype=dtype)
        f = tf.zeros(shape=(B, self._S, self._Q), dtype=dtype)
        y = tf.zeros(shape=(B, self._S, self._Q), dtype=dtype)

        # Initialise standard normal for computing CDFs
        Phi, iPhi = self._standard_normal_cdf_and_inverse_cdf(dtype=dtype)

        # Get update indices for convenience later
        idx = self._get_update_indices(B=B, S=self._S, Q=self._Q, q=0)

        # Slice out common tensors
        b0 = b[:, None, 0]
        C0 = C[:, None, 0, 0] + 1e-12

        # Compute transformation variables at the first step
        e_update = tf.tile(Phi(b0 / C0), (1, self._S))  # (B, S)
        e = tf.tensor_scatter_nd_add(e, idx, e_update)
        f = tf.tensor_scatter_nd_add(f, idx, e_update)

        for i in tf.range(1, self._Q):
            # Update y tensor
            y_update = iPhi(1e-6 + (1 - 2e-6) * w[None, :, i - 1] * e[:, :, i - 1])
            y = tf.tensor_scatter_nd_add(y, idx, y_update)

            # Slice out common tensors
            bi = b[:, None, i]
            Ci_ = C[:, None, i, :i]
            Cii = C[:, None, i, i] + 1e-12
            yi = y[:, :, :i]

            # Compute indices to update d, e and f tensors
            idx = self._get_update_indices(B=B, S=self._S, Q=self._Q, q=i)

            # Update e tensor
            e_update = Phi((bi - tf.reduce_sum(Ci_ * yi, axis=-1)) / Cii)
            e = tf.tensor_scatter_nd_add(e, idx, e_update)

            # Update f tensor
            f_update = e[:, :, i] * f[:, :, i - 1]
            f = tf.tensor_scatter_nd_add(f, idx, f_update)

        mvn_cdf = tf.reduce_mean(f[:, :, -1], axis=-1)

        return mvn_cdf