# Builtin
import warnings
from copy import deepcopy
from operator import itemgetter
from typing import Mapping
from numbers import Number
# External
import numpy as np
from scipy.linalg import cholesky, solve_triangular, cho_solve
from scipy.linalg.blas import dtrmm as tri_mul
import scipy.optimize
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor \
as sk_GaussianProcessRegressor
from sklearn.base import clone, BaseEstimator as BE
from sklearn.utils.validation import check_array
# Local
from gpry.kernels import RBF, Matern, ConstantKernel as C
from gpry.svm import SVM
from gpry.preprocessing import Normalize_bounds, DummyPreprocessor
from gpry.tools import check_random_state, get_Xnumber, delta_logp_of_1d_nstd, \
generic_params_names, shrink_bounds, is_in_bounds
[docs]
class GaussianProcessRegressor(sk_GaussianProcessRegressor, BE):
r"""
GaussianProcessRegressor (GPR) that allows dynamic expansion.
The implementation is based on Algorithm 2.1 of Gaussian Processes
for Machine Learning (GPML) by Rasmussen and Williams.
In addition to standard scikit-learn estimator API,
GaussianProcessRegressor:
* allows prediction without prior fitting (based on the GP prior).
* implements a pipeline for pretransforming data before fitting.
* provides the method :meth:`append_to_data` which allows to append
additional data points to an already existing GPR. This is done either
by refitting the hyperparameters (theta) or alternatively by using the
Matrix inversion Lemma to keep the hyperparameters fixed.
* overwrites the (hidden) native deepcopy function. This enables copying
the GPR as well as the sampled points it contains.
Parameters
----------
kernel : kernel object, string, dict, optional (default: "RBF")
The kernel specifying the covariance function of the GP. If
"RBF"/"Matern" is passed, the asymmetric kernel::
ConstantKernel() * RBF/Matern()
is used as default where ``n_dim`` is the number of dimensions of the
training space. In this case you will have to provide the prior bounds
as the ``bounds`` parameter to the GP. For the Matern kernel
:math:`\nu=3/2`. To pass different arguments to the kernel, e.g. ``nu=5/2``
for Matern, pass a single-key dict as ``{"Matern": {"nu": 2.5}}"``. Note
that the kernel's hyperparameters are optimized during fitting.
output_scale_prior : tuple as (min, max), optional (default: [1e-2, 1e3])
Prior for the (non-squared) scale parameter, in normalised logp units.
length_scale_prior : tuple as (min, max), optional (default: [1e-3, 1e1])
Prior for the length parameters, as a fraction of the parameter priors sizes.
noise_level : float or array-like, optional (default: 1e-2)
Square-root of the value added to the diagonal of the kernel matrix
during fitting. Larger values correspond to increased noise level in the
observations and reduce potential numerical issue during fitting.
If an array is passed, it must have the same number of entries as the
data used for fitting and is used as datapoint-dependent noise level.
Note that this is equivalent to adding a WhiteKernel with c=noise_level.
clip_factor : float, optional (default: 1.1)
Factor for upper clipping of the GPR predictions, to avoid overshoots.
optimizer : str or callable, optional (default: "fmin_l_bfgs_b")
Can either be one of the internally supported optimizers for optimizing
the kernel's parameters, specified by a string, or an externally
defined optimizer passed as a callable. If a callable is passed, it
must have the signature::
def optimizer(obj_func, initial_theta, bounds):
# * 'obj_func' is the objective function to be maximized, which
# takes the hyperparameters theta as parameter and an
# optional flag eval_gradient, which determines if the
# gradient is returned additionally to the function value
# * 'initial_theta': the initial value for theta, which can be
# used by local optimizers
# * 'bounds': the bounds on the values of theta
....
# Returned are the best found hyperparameters theta and
# the corresponding value of the target function.
return theta_opt, func_min
Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize
is used. If None is passed, the kernel's parameters are kept fixed.
Available internal optimizers are::
'fmin_l_bfgs_b'
n_restarts_optimizer : int, optional (default: 0)
The number of restarts of the optimizer for finding the kernel's
parameters which maximize the log-marginal likelihood. The first run
of the optimizer is performed from the kernel's initial parameters,
the remaining ones (if any) from thetas sampled log-uniform randomly
from the space of allowed theta-values. If greater than 0, all bounds
must be finite. Note that n_restarts_optimizer == 0 implies that one
run is performed.
preprocessing_X : X-preprocessor, Pipeline_X, optional (default: None)
Single preprocessor or pipeline of preprocessors for X. If None is
passed the data is not preprocessed. The `fit` method of the
preprocessor is only called when the GP's hyperparameters are refit.
preprocessing_y : y-preprocessor or Pipeline_y, optional (default: None)
Single preprocessor or pipeline of preprocessors for y. If None is
passed the data is not preprocessed. The `fit` method of the preprocessor
is only called when the GP's hyperparameters are refit.
account_for_inf : SVM, None or "SVM" (default: "SVM")
Uses a SVM (Support Vector Machine) to classify the data into finite and
infinite values. This allows the GP to express values of -inf in the
data (unphysical values). If all values are finite the SVM will just
pass the data through itself and do nothing.
inf_threshold : None, float or str
Threshold for the infinities classifier to consider a value finite, understood as
a positive difference with respect to the current maximum y. It can be given as a
string formed of a number and ending in "s", meaning the distance to the mode
which shall be considered finite in :math:`\sigma` using a :math:`\chi^2`
distribution. Used only if account_for_inf is not None.
keep_min_finite : int, optional (default: None)
Minimum number of points to be considered finite, and this part of the GPR
training set. Useful e.g. if a point with a much larger y-value than the rest is
suddenly found. If no infinities classifier selected, it has no effect. Otherwise,
if ``None``, is set to the dimensionality of the problem.
bounds : array-like, shape=(n_dims,2), optional
Array of bounds of the prior [lower, upper] along each dimension. Has
to be provided when the kernel shall be built automatically by the GP.
trust_region_factor : float, optional
If defined as a positive float, it defines a trust region as a hypercube
containing the GPR (finite) training set, enlarged by the given factor.
The bounds of this trust region can be read from the ``trust_bounds`` attribute,
and points outside these bounds will cause the ``predict`` method to return a
negative infinity. Useful when the posterior can be expected to be much
smaller than the prior (but in that case it would be preferable to simply reduce
the prior bounds), noticeable e.g. by the acquisition module is taking too long
to propose points.
trust_region_nstd : float, optional
If defined as a positive float, the definition of the trust region only takes into
account training points corresponding to a significance (assuming a Gaussian
posterior) equivalent to this value in 1d standard deviations.
random_state : int or numpy.random.Generator, optional
The generator used to perform random operations of the GPR. If an integer is
given, it is used as a seed for the default global numpy random number generator.
verbose : 1, 2, 3, optional (default: 1)
Level of verbosity of the GP. 3 prints Infos, Warnings and Errors, 2
Warnings and Errors, and 1 only Errors. Should be set to 2 or 3 if
problems arise.
Attributes
----------
n : int
Number of points/features in the training set of the GPR.
d : int
Dimensionality of the training data.
bounds : array
The bounds with which the GPR was defined.
trust_bounds : array or None
The bounds of the trust region if ``trust_region_factor`` was defined, or ``None``
otherwise.
X_train : array-like, shape = (n_samples, n_features)
Original (untransformed) feature values in training data of the GPR. Intended to
be used when one wants to access the training data for any purpose.
y_train : array-like, shape = (n_samples, [n_output_dims])
Original (untransformed) target values in training data of the GPR. Intended to be
used when one wants to access the training data for any purpose.
X_train_ : array-like, shape = (n_samples, n_features)
(Possibly transformed) feature values in training data of the GPR (also required
for prediction). Mostly intended for internal use.
y_train_ : array-like, shape = (n_samples, [n_output_dims])
(Possibly transformed) target values in training data of the GPR (also required
for prediction). Mostly intended for internal use.
n_total : int
Number of points/features in the training set of the model, including points with
target values classified as infinite.
X_train_all : array-like, shape = (n_samples, n_features)
Original (untransformed) feature values in training data of the model, including
points with target values classified as infinite, and thus not part of the
training set of the GPR.
y_train_all : array-like, shape = (n_samples, [n_output_dims])
Original (untransformed) target values in training data of the model, including
values classified as infinite, and thus not part of the training set of the GPR.
X_train_all_ : array-like, shape = (n_samples, n_features)
(Possibly transformed) feature values in training data of the model, including
points with target values classified as infinite.
y_train_all_ : array-like, shape = (n_samples, [n_output_dims])
(Possibly transformed) target values in training data of the GPR model, including
points with target values classified as infinite.
noise_level : array-like, shape = (n_samples, [n_output_dims]) or scalar
The noise level (square-root of the variance) of the uncorrelated
training data. This is un-transformed.
noise_level_ : array-like, shape = (n_samples, [n_output_dims]) or scalar
The transformed noise level (if y is preprocessed)
alpha : array-like, shape = (n_samples, [n_output_dims]) or scalar
The value which is added to the diagonal of the kernel. This is the
square of `noise_level`.
kernel_ : :mod:`kernels` object
The kernel used for prediction. The structure of the kernel is the
same as the one passed as parameter but with optimized hyperparameters.
alpha_ : array-like, shape = (n_samples, n_samples)
**Not to be confused with alpha!** The inverse Kernel matrix of the
training points multiplied with ``y_train_`` (Dual coefficients of
training data points in kernel space). Needed at prediction.
V_ : array-like, shape = (n_samples, n_samples)
Lower-triangular Cholesky decomposition of the inverse kernel in ``X_train_``
.. warning::
``L_`` is not recomputed when using the append_to_data method
without refitting the hyperparameters. As only ``K_inv_`` and
``alpha_`` are used at prediction this is not neccessary.
log_marginal_likelihood_value_ : float
The log-marginal-likelihood of ``self.kernel_.theta``
**Methods:**
.. autosummary::
:toctree: stubs
append_to_data
fit
_update_model
predict
"""
def __init__(self, kernel="RBF", output_scale_prior=[1e-2, 1e3],
length_scale_prior=[1e-3, 1e1], noise_level=1e-2, clip_factor=1.1,
optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0,
preprocessing_X=None, preprocessing_y=None,
account_for_inf="SVM", inf_threshold="20s", keep_min_finite=None,
trust_region_factor=None, trust_region_nstd=None,
bounds=None, random_state=None, verbose=1):
self.n_last_appended = 0
self.n_last_appended_finite = 0
self.newly_appended_for_inv = 0
self.preprocessing_X = \
DummyPreprocessor if preprocessing_X is None else preprocessing_X
self.preprocessing_y = \
DummyPreprocessor if preprocessing_y is None else preprocessing_y
self.noise_level = noise_level
if clip_factor < 1:
raise ValueError("'clip_factor' must be >= 1, or None for no clippling.")
self.clip_factor = clip_factor
self.n_eval = 0
self.n_eval_loglike = 0
self.verbose = verbose
self.inf_value = np.inf
self.minus_inf_value = -np.inf
self._fitted = False
self.bounds = bounds
self.trust_bounds = None
self.trust_region_factor = trust_region_factor
self.trust_region_nstd = trust_region_nstd
# Initialize SVM if necessary
self.inf_threshold = inf_threshold
self.keep_min_finite = (
keep_min_finite if keep_min_finite is not None else max(2, self.d)
)
if isinstance(account_for_inf, str) and account_for_inf.lower() == "svm":
self.infinities_classifier = SVM(random_state=random_state)
elif account_for_inf is False:
self.infinities_classifier = None
else:
self.infinities_classifier = account_for_inf
if self.infinities_classifier is not None:
y_preprocessor_guaranteed_linear = (
self.preprocessing_y is None or
getattr(self.preprocessing_y, "is_linear", False)
)
if not y_preprocessor_guaranteed_linear:
warnings.warn(
"If using a standard classifier for infinities, the y-preprocessor "
"needs to be linear (declare an attr ``is_linear=True``). This may "
"lead to errors further in the pipeline."
)
if self.inf_threshold is None:
raise ValueError(
"Specify 'inf_threshold' if using infinities classifier."
)
value, is_sigma_units, sigma_power = get_Xnumber(
self.inf_threshold, "s", None, dtype=float, varname="inf_threshold"
)
if sigma_power is not None:
raise ValueError("Power for sigma not supported.")
if is_sigma_units:
self._diff_threshold = self.compute_threshold_given_sigma(value, self.d)
else:
self._diff_threshold = value
# Auto-construct inbuilt kernels
if isinstance(kernel, str):
kernel = {kernel: {}}
if isinstance(kernel, Mapping):
if len(kernel) != 1:
raise ValueError("'kernel' must be a single-key dict.")
kernel_name = list(kernel)[0]
kernel_args = kernel[kernel_name] or {}
if self.bounds is None:
raise ValueError("You selected used the automatically "
f"constructed '{kernel_name}' kernel without "
"specifying prior bounds.")
# Transform prior bounds if neccessary
self.bounds_ = self.preprocessing_X.transform_bounds(self.bounds)
# Check if it's a supported kernel
try:
length_corr_kernel = {"rbf": RBF, "matern": Matern}[kernel_name.lower()]
except KeyError as excpt:
raise ValueError(
"Currently only 'RBF' and 'Matern' are "
f"supported as standard kernels. Got '{kernel_name}'."
) from excpt
# Build kernel
output_scale_init = np.sqrt(output_scale_prior[0] * output_scale_prior[1])
length_scale_init = np.sqrt(length_scale_prior[0] * length_scale_prior[1])
kernel = (
C(
output_scale_init**2,
[output_scale_prior[0]**2, output_scale_prior[1]**2],
) * length_corr_kernel(
[length_scale_init] * self.d,
length_scale_prior,
prior_bounds=self.bounds_,
**kernel_args,
)
)
sk_GaussianProcessRegressor.__init__(
self, kernel=kernel, alpha=noise_level**2., optimizer=optimizer,
n_restarts_optimizer=n_restarts_optimizer,
normalize_y=False, copy_X_train=True,
random_state=random_state
)
self.X_train, self.y_train = np.empty((0, self.d)), np.empty((0,))
self.X_train_, self.y_train_ = None, None
self.X_train_all, self.y_train_all = np.empty((0, self.d)), np.empty((0,))
self.X_train_all_, self.y_train_all_ = None, None
self.noise_level_ = None
self.kernel_ = None
if self.verbose >= 3:
print("Initializing GP with the following options:")
print("===========================================")
print("* Kernel:")
print(" ", self.kernel)
print(" with hyperparameters:")
for h in self.kernel.hyperparameters:
print(" -", h)
print(f"* Noise level: {noise_level}")
print(f"* Optimizer: {optimizer}")
print(f"* Optimizer restarts: {n_restarts_optimizer}")
print(f"* X-preprocessor: {preprocessing_X is not None}")
print(f"* y-preprocessor: {preprocessing_y is not None}")
print(f"* SVM to account for infinities: {bool(account_for_inf)}")
@property
def d(self):
"""Dimension of the feature space."""
if self.bounds is None:
return self.X_train.shape[1]
else:
return self.bounds.shape[0]
@property
def y_max(self):
"""The max. posterior value in the training set."""
return np.max(getattr(self, "y_train", [self.minus_inf_value]))
@property
def n(self):
"""
Number of points in the training set.
This excludes infinite points if the GPR was initialized to account for them.
To get the total number of points added to the model, both finite and infinite,
use the property ``GaussianProcessRegressor.n_total``.
"""
return len(getattr(self, "y_train", []))
@property
def n_finite(self):
"""
Number of points in the training set. Alias of ``GaussianProcessRegressor.n``.
"""
return self.n
@property
def n_total(self):
"""
Returns the total number of points added to the model, both finite and infinite.
Infinite points, if accounted for, are not part of the training set of the GPR.
"""
if self.infinities_classifier:
# The SVM usually contains all points, but maybe it hasn't been trained yet.
# In that case, return the GPR's
return self.infinities_classifier.n or self.n
else:
return self.n
@property
def X_train_infinite(self):
"""
X of points in the training set which have been classified as infinite.
"""
if self.infinities_classifier is None:
return np.empty(shape=(0, self.d))
return self.X_train_all[~self.infinities_classifier.y_finite]
@property
def y_train_infinite(self):
"""
X of points in the training set which have been classified as infinite.
"""
if self.infinities_classifier is None:
return np.empty(shape=(0,))
return self.y_train_all[~self.infinities_classifier.y_finite]
@property
def fitted(self):
"""Whether the GPR hyperparameters have been fitted at least once."""
return self._fitted
@property
def last_appended(self):
"""
Returns a copy of the last appended training points (finite/accepted or not),
as (X, y).
"""
if self.infinities_classifier is None:
return self.last_appended_finite
return (np.copy(self.X_train_all[-self.n_last_appended:]),
np.copy(self.y_train_all[-self.n_last_appended:]))
@property
def last_appended_finite(self):
"""Returns a copy of the last appended GPR (finite) training points, as (X, y)."""
return (np.copy(self.X_train[-self.n_last_appended_finite:]),
np.copy(self.y_train[-self.n_last_appended_finite:]))
@property
def scales(self):
"""
Kernel scales as ``(output_scale, (length_scale_1, ...))`` in non-transformed
coordinates.
"""
return (
self.preprocessing_y.inverse_transform_scale(
np.sqrt(self.kernel_.k1.constant_value)),
tuple(self.preprocessing_X.inverse_transform_scale(
self.kernel_.k2.length_scale))
)
[docs]
def training_set_as_df(self):
"""
Returns the training set as a pandas DataFrame (created on-the-fly and not saved).
"""
data = dict(zip(generic_params_names(self.d), self.X_train_all.copy().T))
data["y"] = self.y_train_all.copy()
data["is_finite"] = self.is_finite(data["y"])
return pd.DataFrame(data)
@property
def abs_finite_threshold(self):
"""
Absolute threshold for ``y`` values to be considered finite.
"""
threshold = self.infinities_classifier.abs_threshold
if self.preprocessing_y is None:
return threshold
return self.preprocessing_y.inverse_transform_scale(threshold)
[docs]
def is_finite(self, y):
"""
Returns the classification of y (target) values as finite (True) or not, by
comparing them with the current threshold.
Notes
-----
Use this method instead of the equivalent one of the 'infinities_classifier'
attribute, since the arguments of that one may need to be transformed first.
If calling with an argument which is not either the training set or a subset of it
results may be inconsistent, since new values may modify the threshold.
"""
if self.infinities_classifier is None:
return np.full(shape=len(y), fill_value=True)
return self.infinities_classifier.is_finite(self.preprocessing_y.transform(y))
[docs]
def predict_is_finite(self, X, validate=True):
"""
Returns a prediction for the classification of the target value at some given
parameters.
Notes
-----
Use this method instead of the equivalent one of the 'infinities_classifier'
attribute, since the arguments of that one may need to be transformed first.
"""
if self.infinities_classifier is None:
return np.full(shape=(len(self.y_train_all), ), fill_value=True)
return self.infinities_classifier.predict(
np.ascontiguousarray(self.preprocessing_X.transform(X)), validate=validate
)
[docs]
def set_random_state(self, random_state):
"""
(Re)sets the random state, including the SVM, if present.
"""
self.random_state = random_state
if self.infinities_classifier:
# In the SVM case, since we have not wrapper the calls to the RNG,
# (as we have for the GPR), we need to repackage the new numpy Generator
# as a RandomState, which is achieved by gpry.tools.check_random_state
self.infinities_classifier.random_state = check_random_state(
random_state, convert_to_random_state=True)
[docs]
def update_trust_region(self):
"""
Adjusts the boundaries of the trust region.
Keeps at least ``d`` points, temporarily lowering the threshold if needed.
"""
if self.trust_region_factor is None:
return
if self.trust_region_nstd is None:
use_X = self.X_train
else:
trust_region_nstd_ = self.trust_region_nstd
use_X = np.empty(shape=(0, self.X_train.shape[1]))
while len(use_X) < min(self.d, self.n):
use_X = self.X_train[
np.where(max(self.y_train) - self.y_train <
delta_logp_of_1d_nstd(trust_region_nstd_, self.d))
]
trust_region_nstd_ = trust_region_nstd_ + 0.1
self.trust_bounds = shrink_bounds(
self.bounds, use_X, factor=self.trust_region_factor
)
[docs]
def append_to_data(
self, X, y, noise_level=None, fit_gpr=True, fit_classifier=True,
):
r"""
Append newly acquired data to the GP Model and updates it.
Here updating refers to the re-calculation of the the GPR inverse matrix
:math:`(K(X,X)+\sigma_n^2 I)^{-1}` which is needed for predictions.
The highest cost incurred by this method is the refitting of the GPR kernel
hyperparameters :math:`\theta`. It can be useful to disable it (``fit_gpr=False``)
in cases where it is worth saving the computational expense in exchange for a loss
of information, such as when performing parallelized active sampling (NB: this is
only possible when the GPR hyperparameters have been fit at least once).
An intermediate option is to perform a single GPR hyperparameter optimization run
(instead of the default number of restarts) from the current hyperparameter
values, using ``fit_gpr='simple'``.
For an additional speed boost, the refitting of the infinities classifier (if
present) can be disabled with ``fit_classifier=False`` (.if a GPR refit is
requested this value is overridden).
If called with ``X=None, y=None``, it re-fits the model without adding new points.
The following calls should then be equivalent:
.. code-block:: python
fit_gpr_kwargs = {"n_restarts": 10}
# A
gpr.append_to_data(new_X, new_y, fit_gpr=fit_gpr_kwargs)
# B
gpr.append_to_data(new_X, new_y, fit_gpr=False)
gpr.fit_gpr_hyperparameters(**fit_gpr_kwargs)
# C
gpr.append_to_data(new_X, new_y, fit_gpr=False, fit_classifier=False)
gpr.append_to_data(None, None, fit_gpr=fit_gpr_kwargs)
Parameters
----------
X : array-like, shape = (n_samples, n_features), or None
Training data to append to the model.
y : array-like, shape = (n_samples, [n_output_dims]), or None
Target values to append to the data
noise_level : array-like, shape = (n_samples, [n_output_dims])
Uncorrelated standard deviations to add to the diagonal part of the covariance
matrix. Needs to have the same number of entries as y. If None, the
noise_level set in the instance is used. If you pass a single number the noise
level will be overwritten. In this case it is advisable to refit the
hyperparameters of the kernel.
fit_gpr : Bool or 'simple', dict, optional (default: True)
Whether the GPR :math:`\theta`-parameters are optimised (``'simple'`` for a
single run from last optimum; ``True`` for a more thorough search with
multiple restarts), or a simple kernel matrix inversion is performed
(``False``) with constant hyperparameters. Can also be passed a dict with
arguments to be passed to the `fit_gpr_hyperparameters` method.
fit_classifier: Bool, optional (default: True)
Whether the infinities classifier is refit. Overridden to ``True`` if
``fit_gpr`` is not ``False``.
Returns
-------
self
Returns an instance of self.
"""
# Ensure fit_gpr --> fit_classifier --> fit_preprocessors
fit_preprocessors = False
fit_gpr_kwargs = None
if fit_gpr is True:
fit_classifier = True
fit_gpr_kwargs = {}
elif str(fit_gpr) == "simple":
fit_classifier = True
fit_gpr_kwargs = {"simple": True}
fit_gpr = True
elif isinstance(fit_gpr, Mapping):
fit_classifier = True
fit_gpr_kwargs = deepcopy(fit_gpr)
fit_gpr = True
elif fit_gpr is not False:
raise ValueError(
"`fit_gpr` needs to be bool, 'simple', or a dict of args for the "
f"`fit_gpr_hyperparameters` method. Got {fit_gpr}."
)
if fit_classifier:
fit_preprocessors = True
force_fit_gpr = False # to avoid skipping fit if no points added if X,y = None
if X is None and y is None:
X, y = np.empty((0, self.d)), np.empty((0,))
force_fit_gpr = fit_gpr
if noise_level is not None:
raise ValueError("Cannot give a noise level if X and y are not given.")
elif X is None or y is None: # (None, None) already excluded
raise ValueError("If passing X, y needs to be passed too, and viceversa.")
noise_level_valid = self._validate_noise_level(noise_level, len(y))
# NB: if called with X,y = None, None, we could also have adopted the convention
# that the "last"-named variables refer to the last call with non-null X, y,
# but for now they are reset at every call, turning into 0 if no points given.
self.n_last_appended = len(y)
self.X_train_all = np.append(self.X_train_all, X, axis=0)
self.y_train_all = np.append(self.y_train_all, y)
self._update_noise_level(noise_level_valid)
# 1. Fit preprocessors with finite points and select finite points in the process,
# and create transformed training set and noises.
# NB: which points are finite does not change after SVM refit (as long as
# y-preprocessor is liner), so we can select them now.
if self.infinities_classifier is None:
is_finite_all = np.full(fill_value=True, shape=(len(self.y_train_all), ))
X_finite = np.copy(self.X_train_all)
y_finite = np.copy(self.y_train_all)
else:
# Use the manual method for non-preprocessed input.
# Make sure that the threshold is such that there is a min of finite ones.
diff_threshold_keep_n = self._diff_threshold_if_keep_n_finite(
self.y_train_all, self.keep_min_finite, self._diff_threshold
)
# pylint: disable=protected-access
is_finite_all = self.infinities_classifier._is_finite_raw(
self.y_train_all, diff_threshold_keep_n
)
X_finite = np.copy(self.X_train_all[is_finite_all])
y_finite = np.copy(self.y_train_all[is_finite_all])
if fit_preprocessors:
self.preprocessing_X.fit(X_finite, y_finite)
self.preprocessing_y.fit(X_finite, y_finite)
self.X_train_all_ = self.preprocessing_X.transform(self.X_train_all)
self.y_train_all_ = self.preprocessing_y.transform(self.y_train_all)
# The transformed noise level is always an array.
noise_level_array = (
np.full(fill_value=self.noise_level, shape=(len(self.y_train_all_),))
if isinstance(self.noise_level, Number) else self.noise_level
)
self.noise_level_ = self.preprocessing_y.transform_scale(noise_level_array)
# 2. Fit the SVM in the transformed space.
if self.infinities_classifier is None:
is_finite_last_appended = np.full(
fill_value=True, shape=(self.n_last_appended, )
)
else:
if fit_classifier:
# Again, make sure that we keep a min number of finite points
diff_threshold_keep_n_ = self.preprocessing_y.transform_scale(
diff_threshold_keep_n
)
# The SVM lives in the preprocessed space, and the preprocessor may have
# changed, so we need to pass all points every time
is_finite_predict = self.infinities_classifier.fit(
self.X_train_all_, self.y_train_all_, diff_threshold_keep_n_
)
assert np.array_equal(is_finite_all, is_finite_predict), \
"Infinities classifier miss-classified at least 1 point."
# Even if assert test fails, use the real classification
is_finite_last_appended = is_finite_all[-self.n_last_appended:]
# The number of newly added points. Used for the _update_model method
self.n_last_appended_finite = sum(is_finite_last_appended)
# If all added values are infinite there's no need to refit the GPR,
# unless an explicit call for that with X, y = None was made
if not self.n_last_appended_finite and not force_fit_gpr:
return self
# 3. Re-fit the GPR in the transformed space, and maybe hyperparameters
self.X_train = X_finite
self.y_train = y_finite
self.X_train_ = self.preprocessing_X.transform(self.X_train)
self.y_train_ = self.preprocessing_y.transform(self.y_train)
self.alpha = self.noise_level_[is_finite_all]**2 # NB: different from self.alpha_
self.newly_appended_for_inv = self.n_last_appended_finite
if fit_gpr:
self.fit_gpr_hyperparameters(**fit_gpr_kwargs)
else: # just update the regressor, keeping the kernel constant
self._update_model()
self.update_trust_region()
[docs]
def _validate_noise_level(self, noise_level, n_train):
"""
Checks for type and inconsistencies for the given noise level. Separated from the
update method to be performed before updating class attributes.
Returns a validated value that can be used directly.
"""
if n_train == 0 and noise_level is not None:
raise ValueError("noise_level must be None if not fitting to new points.")
if np.iterable(noise_level):
noise_level = np.atleast_1d(noise_level)
if noise_level.shape[0] != n_train:
raise ValueError(
"noise_level must be an array with same number of entries as y, but "
f"len(n)={noise_level.shape[0]} != len(y)={n_train})"
)
elif isinstance(noise_level, Number):
if np.iterable(self.noise_level):
noise_level = np.full(fill_value=noise_level, shape=(n_train,))
elif noise_level is None:
if np.iterable(self.noise_level):
raise ValueError(
"Need to pass non-null noise_level (scalar or array) because concrete"
" values were given earlier for the training points."
)
else:
raise ValueError(
"noise_level needs to be an iterable, number or None. "
f"Got type(noise_level)={type(noise_level)}"
)
return noise_level
[docs]
def _update_noise_level(self, noise_level):
"""
Updates the noise level of the training set with the new values (or the lack
thereof).
Assumes possible inconsistencies dealt with by ``_validate_noise_level``.
"""
if np.iterable(noise_level):
if not np.iterable(self.noise_level):
if self.verbose > 1:
warnings.warn(
"A new noise level has been assigned to the updated training set "
"while the old training set has a single scalar noise level: "
f"{self.noise_level}. Converting to individual levels!"
)
self.noise_level = np.full(
fill_value=self.noise_level, shape=(len(self.y_train_all),)
)
self.noise_level = np.append(self.noise_level, noise_level, axis=0)
elif isinstance(noise_level, Number):
# NB at validation new=scalar has been converted to array if old=array
assert not np.iterable(self.noise_level)
if not np.isclose(noise_level, self.noise_level):
if self.verbose > 1:
warnings.warn(
"Overwriting the noise level with a scalar. Make sure that "
"kernel's hyperparamters are refitted."
)
self.noise_level = noise_level
elif noise_level is None:
pass # keep old level for new points.
[docs]
def remove_from_data(self, position, fit=True):
r"""
*WARNING* This function is currently outdated and raises NotImplementedError.
Removes data points from the GP model. Works very similarly to the
:meth:`append_to_data` method with the only difference being that the
position(s) of the training points to delete are given instead of
values.
Parameters
----------
position : int or array-like, shape = (n_samples,)
The position (index) at which to delete from the training data.
If an array is given the data is deleted at multiple points.
fit : Bool, optional (default: True)
Whether the model is refit to new :math:`\theta`-parameters
or just updated.
Returns
-------
self
Returns an instance of self.
"""
raise NotImplementedError("This function is outdated and needs review.")
# Legacy code below, for re-implementation
if not (hasattr(self, "X_train_") and hasattr(self, "y_train_")):
raise ValueError("GP model contains no points. Cannot remove "
"points which do not exist.")
if np.iterable(position):
if np.max(position) >= len(self.y_train_):
raise ValueError("Position index is higher than length of "
"training points")
else:
if position >= len(self.y_train_):
raise ValueError("Position index is higher than length of "
"training points")
self.X_train_ = np.delete(self.X_train_, position, axis=0)
self.y_train_ = np.delete(self.y_train_, position)
self.X_train = np.delete(self.X_train, position, axis=0)
self.y_train = np.delete(self.y_train, position)
if np.iterable(self.noise_level):
self.noise_level = np.delete(self.noise_level, position)
self.noise_level_ = np.delete(self.noise_level_, position)
self.alpha = np.delete(self.alpha, position)
# TODO: add hyperparameter bounds
if fit:
self.fit(self.X_train, self.y_train)
else:
# Precompute quantities required for predictions which are
# independent of actual query points
K = self.kernel_(self.X_train_)
K[np.diag_indices_from(K)] += self.alpha
self._kernel_inverse(K)
return self
# Wrapper around log_marginal_likelihood to count the number of evaluations
[docs]
def log_marginal_likelihood(self, *args, **kwargs):
"""
Log-marginal likelihood of the kernel hyperparameters given the training data.
"""
self.n_eval_loglike += 1
return super().log_marginal_likelihood(*args, **kwargs)
[docs]
def fit_gpr_hyperparameters(
self, simple=False, start_from_current=True, n_restarts=None,
hyperparameter_bounds=None
):
r"""Optimizes the hyperparameters :math:`\theta` for the current training data.
The algorithm used to perform the optimization is very similar to the one provided
by Scikit-learn. The only major difference is, that gradient information is used
in addition to the values of the marginalized log-likelihood.
Parameters
----------
n_restarts : int, default None
Number of restarts of the optimizer. If not defined, uses the one set at
instantiation. ``1`` means a single optimizer run.
start_from_current : bool, default: True
Starts the first optimization run from the current hyperparameters (ignored if
not previously fitted).
simple : bool, default: False
If True, runs the optimiser only from the last optimum of the hyperparameters,
without restarts. Shorthand for ``start_from_current=True, n_restarts=1``. (it
overrides them if True).
hyperparameter_bounds : array-like, default: None
Bounds for the hyperparameters, if different from those declared at init.
Returns
-------
self
"""
if simple:
start_from_current = True
n_restarts = 1
if not self._fitted:
start_from_current = False
if n_restarts is None:
n_restarts = self.n_restarts_optimizer
no_optimizer = self.optimizer is None
no_hyperparams = self.kernel.n_dims == 0
no_restarts = n_restarts <= 0
if no_optimizer or no_hyperparams or no_restarts:
msg_reasons = []
if no_optimizer:
msg_reasons += ["no optimizer has been specified"]
if no_hyperparams:
msg_reasons += ["the kernel has no hyperparamenters"]
if no_restarts:
msg_reasons += ["the number of optimizer restarts requested is 0."]
warnings.warn(
f"Hyper-parameters not (re)fit. Reason(s): {'; '.join(msg_reasons)}."
)
self.log_marginal_likelihood_value_ = \
self.log_marginal_likelihood(self.kernel_.theta, clone_kernel=False)
self._update_model()
return self
# Choose hyperparameters based on maximizing the log-marginal
# likelihood (potentially starting from several initial values)
# We don't need to clone the kernel here, even if overwritten during optimization,
# because it will be recomputed in the final `log_marginal_likelihood` call.
def obj_func(theta, eval_gradient=True):
if eval_gradient:
lml, grad = self.log_marginal_likelihood(
theta, eval_gradient=True, clone_kernel=False)
return -lml, -grad
else:
return -self.log_marginal_likelihood(theta, clone_kernel=False)
if self.kernel_ is None:
self.kernel_ = clone(self.kernel)
if hyperparameter_bounds is None:
hyperparameter_bounds = self.kernel_.bounds
else:
# TODO: validate dimensions!
pass
# If at least one run will be sampled from the prior, is has to be finite
if n_restarts - int(start_from_current):
if not np.isfinite(hyperparameter_bounds).all():
raise ValueError(
"There is at least one optimizer run the requires sampling from the "
"hyperparameters' prior, but it has not finite density, because not "
"all bounds are finite. You can pass some finite bounds manually "
"using ``hyperparameter_bounds``."
)
optima = []
self._rng = check_random_state(self.random_state)
for iteration in range(n_restarts):
if iteration == 0 and start_from_current:
# self.kernel_ guaranteed to exist because self.fitted checked above
theta_initial = self.kernel_.theta
else:
# Additional runs are performed from log-uniform chosen initial theta
theta_initial = self._rng.uniform(
hyperparameter_bounds[:, 0], hyperparameter_bounds[:, 1]
)
# Run the optimizer!
optima.append(
self._constrained_optimization(
obj_func, theta_initial, hyperparameter_bounds
)
)
# Select result from run with minimal (negative) log-marginal likelihood,
# and ensure recomputation of the kernel with the new hyperparamenters.
lml_values = list(map(itemgetter(1), optima))
self.log_marginal_likelihood_value_ = -np.min(lml_values)
self.kernel_.theta = optima[np.argmin(lml_values)][0]
# Precompute quantities required for predictions which are independent
# of actual query points
self._update_model()
self._fitted = True
return self
[docs]
def _update_model(self):
r"""Updates a preexisting model using a single matrix inversion.
This method is used when a refitting of the :math:`\theta`-parameters
is not needed. In this case only the Inverse of the Covariance matrix
is updated. This method does not take X or y as inputs and should only
be called from the append_to_data method.
The X and y values used for training are taken internally from the
instance.
Returns
-------
self
"""
# Check if there are new points with which to update:
if self.newly_appended_for_inv < 1:
warnings.warn("No new points have been appended to the model.")
return self
K = self.kernel_(self.X_train_)
K[np.diag_indices_from(K)] += self.alpha
self._kernel_inverse(K)
# Reset newly_appended_for_inv to 0
self.newly_appended_for_inv = 0
return self
[docs]
def predict(self, X, return_std=False, return_cov=False,
return_mean_grad=False, return_std_grad=False, validate=True,
ignore_trust_region=False
):
"""
Predict output for X.
In addition to the mean of the predictive distribution, also its
standard deviation (return_std=True) or covariance (return_cov=True),
the gradient of the mean and the standard-deviation with respect to X
can be optionally provided.
Parameters
----------
X : array-like, shape = (n_samples, n_features)
Query points where the GP is evaluated.
return_std : bool, default: False
If True, the standard-deviation of the predictive distribution at
the query points is returned along with the mean.
return_mean_grad : bool, default: False
Whether or not to return the gradient of the mean.
Only valid when X is a single point.
return_std_grad : bool, default: False
Whether or not to return the gradient of the std.
Only valid when X is a single point.
validate : bool, default: True
If False, ``X`` is assumed to be correctly formatted (2-d float array, with
points as rows and dimensions/features as columns, C-contiguous), and no
checks are performed on it. Reduces overhead. Use only for repeated calls when
the input is programmatically generated to be correct at each stage.
ignore_trust_region : bool (default: False)
If ``True`` and trust-region definition was used (``trust_region_factor``
defined at initialisation), it ignores the trust region and does not return
a negative infinity outside the trust region.
.. note::
Note that in contrast to the sklearn GP Regressor our implementation
cannot return the full covariance matrix. This is to save on some
complexity and since the full covariance cannot be calculated if
either values are infinite or a y-preprocessor is used.
Returns
-------
y_mean : array, shape = (n_samples, [n_output_dims])
Mean of predictive distribution a query points
y_std : array, shape = (n_samples,), optional
Standard deviation of predictive distribution at query points.
Only returned when return_std is True.
y_cov : array, shape = (n_samples, n_samples), optional
Covariance of joint predictive distribution a query points.
Only returned when return_cov is True.
y_mean_grad : shape = (n_samples, n_features), optional
The gradient of the predicted mean.
y_std_grad : shape = (n_samples, n_features), optional
The gradient of the predicted std.
"""
self.n_eval += len(X)
if return_std_grad and not (return_std and return_mean_grad):
raise ValueError(
"Not returning std_gradient without returning "
"the std and the mean grad.")
if X.shape[0] != 1 and (return_mean_grad or return_std_grad):
raise ValueError("Mean grad and std grad not implemented \
for n_samples > 1")
if validate and (self.kernel is None or self.kernel.requires_vector_input):
X = check_array(X, ensure_2d=True, dtype="numeric")
elif validate:
X = check_array(X, ensure_2d=False, dtype=None)
impose_trust_region = self.trust_bounds is not None and not ignore_trust_region
i_outside_trust = None
if impose_trust_region:
i_outside_trust = np.logical_not(
is_in_bounds(X, self.trust_bounds, check_shape=False)
)
if not hasattr(self, "X_train_"): # Not fit; predict based on GP prior
# we assume that since the GP has not been fit to data the SVM can
# be ignored
y_mean = np.zeros(X.shape[0])
if impose_trust_region:
y_mean[i_outside_trust] = self.minus_inf_value
if return_std:
y_var = self.kernel.diag(X)
y_std = np.sqrt(y_var)
if not return_mean_grad and not return_std_grad:
return y_mean, y_std
if return_mean_grad:
mean_grad = np.zeros_like(X)
if return_std:
if return_std_grad:
std_grad = np.zeros_like(X)
return y_mean, y_std, mean_grad, std_grad
else:
return y_mean, y_std, mean_grad
else:
return y_mean, mean_grad
else:
return y_mean
# First check if the SVM says that the value should be -inf
if self.infinities_classifier is not None:
# Every variable that ends in _full is the full (including infinite)
# values
X = np.copy(X) # copy since we might change it
n_samples = X.shape[0]
n_dims = X.shape[1]
# Initialize the full arrays for filling them later with infinite
# and non-infinite values
y_mean_full = np.ones(n_samples)
y_std_full = np.zeros(n_samples) # std is zero when mu is -inf
grad_mean_full = np.ones((n_samples, n_dims))
grad_std_full = np.zeros((n_samples, n_dims))
X_ = X if self.preprocessing_X is None else self.preprocessing_X.transform(X)
finite = self.infinities_classifier.predict(
np.ascontiguousarray(X_), validate=validate
)
# If all values are infinite there's no point in running the
# prediction through the GP
if np.all(~finite):
y_mean = y_mean_full * self.minus_inf_value
if return_std:
y_std = np.zeros(n_samples)
if not return_mean_grad and not return_std_grad:
return y_mean, y_std
if return_mean_grad:
grad_mean = np.ones((n_samples, n_dims)) * self.inf_value
if return_std:
if return_std_grad:
grad_std = np.zeros((n_samples, n_dims))
return y_mean, y_std, grad_mean, grad_std
else:
return y_mean, y_std, grad_mean
else:
return y_mean, grad_mean
return y_mean
y_mean_full[~finite] = self.minus_inf_value # Set infinite values
grad_mean_full[~finite] = self.inf_value # the grad of inf values is +inf
X = X[finite] # only predict the finite samples
X_ = X if self.preprocessing_X is None else self.preprocessing_X.transform(X)
# Predict based on GP posterior
K_trans = self.kernel_(X_, self.X_train_)
y_mean_ = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)
# Undo normalization
if self.preprocessing_y is None:
y_mean = y_mean_
else:
y_mean = self.preprocessing_y.inverse_transform(y_mean_)
# Upper clipping to avoid overshoots
if self.clip_factor is not None:
y_mean = np.clip(
y_mean,
None,
(
self.clip_factor * max(self.y_train) -
(self.clip_factor - 1) * min(self.y_train)
)
)
# Put together with SVM predictions and trust region
if self.infinities_classifier is not None:
y_mean_full[finite] = y_mean
y_mean = y_mean_full
if impose_trust_region:
y_mean[i_outside_trust] = self.minus_inf_value
if return_std:
M = tri_mul(1., self.V_, K_trans.T, lower=True)
# Compute variance of predictive distribution
y_var = self.kernel_.diag(X_)
y_var -= np.einsum("ji,ji->i", M, M, optimize=True)
# np.einsum("ij,ij->i", np.dot(K_trans, K_inv), K_trans)
# np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv)
# Check if any of the variances is negative because of
# numerical issues. If yes: set the variance to 0.
y_var_negative = y_var < 0
if np.any(y_var_negative):
if self.verbose > 4:
warnings.warn("Predicted variances smaller than 0. "
"Setting those variances to 0.")
y_var[y_var_negative] = 0.0
y_std = np.sqrt(y_var)
y_std_untransformed = np.copy(y_std)
# Undo normalization
if self.preprocessing_y is not None:
y_std = self.preprocessing_y.\
inverse_transform_scale(y_std)
# Add infinite values
if self.infinities_classifier is not None:
y_std_full[finite] = y_std
y_std = y_std_full
if not return_mean_grad and not return_std_grad:
return y_mean, y_std
if return_mean_grad:
grad = self.kernel_.gradient_x(X_[0], self.X_train_)
grad_mean = np.dot(grad.T, self.alpha_)
# Undo normalization
if self.preprocessing_y is not None:
grad_mean = self.preprocessing_y.\
inverse_transform_scale(grad_mean)
# Include infinite values
if self.infinities_classifier is not None:
grad_mean_full[finite] = grad_mean
grad_mean = grad_mean_full
if return_std_grad:
grad_std = np.zeros(X_.shape[1])
if not np.allclose(y_std, grad_std):
# TODO: This can be made much more efficient,
# but I don't think it's used currently
grad_std = -np.dot(K_trans,
np.dot(self.V_.T.dot(self.V_), grad))[0] \
/ y_std_untransformed
# Undo normalization
if self.preprocessing_y is not None:
# Apply inverse transformation twice
grad_std = self.preprocessing_y.\
inverse_transform_scale(grad_std)
grad_std = self.preprocessing_y.\
inverse_transform_scale(grad_std)
# Include infinite values
if self.infinities_classifier is not None:
grad_std_full[finite] = grad_std
grad_std = grad_std_full
return y_mean, y_std, grad_mean, grad_std
if return_std:
return y_mean, y_std, grad_mean
else:
return y_mean, grad_mean
return y_mean
[docs]
def predict_std(self, X, validate=True):
"""
Predict output standart deviation for X.
Parameters
----------
X : array-like, shape = (n_samples, n_features)
Query points where the GP is evaluated.
validate : bool, default: True
If False, ``X`` is assumed to be correctly formatted (2-d float array, with
points as rows and dimensions/features as columns, C-contiguous), and no
checks are performed on it. Reduces overhead. Use only for repeated calls when
the input is programmatically generated to be correct at each stage.
Returns
-------
y_std : array, shape = (n_samples,), optional
Standard deviation of predictive distribution at query points.
Only returned when return_std is True.
"""
self.n_eval += len(X)
if validate and (self.kernel is None or self.kernel.requires_vector_input):
X = check_array(X, ensure_2d=True, dtype="numeric")
elif validate:
X = check_array(X, ensure_2d=False, dtype=None)
if not hasattr(self, "X_train_"): # Not fit; predict based on GP prior
# we assume that since the GP has not been fit to data the SVM can be ignored
return np.sqrt(self.kernel.diag(X))
# First check if the SVM says that the value should be -inf
if self.infinities_classifier is not None:
# Every variable that ends in _full is the full (including infinite) values
X = np.copy(X) # copy since we might change it
n_samples = X.shape[0]
# Initialize the full arrays for filling them later with infinite
# and non-infinite values
y_std_full = np.zeros(n_samples) # std is zero when mu is -inf
X_ = X if self.preprocessing_X is None else self.preprocessing_X.transform(X)
finite = self.infinities_classifier.predict(
np.ascontiguousarray(X_), validate=validate
)
# If all values are infinite there's no point in running the
# prediction through the GP
if np.all(~finite):
return np.zeros(n_samples)
X = X[finite] # only predict the finite samples
X_ = X if self.preprocessing_X is None else self.preprocessing_X.transform(X)
# Predict based on GP posterior
K_trans = self.kernel_(X_, self.X_train_)
M = tri_mul(1., self.V_, K_trans.T, lower=True)
# Compute variance of predictive distribution
y_var = self.kernel_.diag(X_)
y_var -= np.einsum("ji,ji->i", M, M, optimize=True)
# np.einsum("ij,ij->i", np.dot(K_trans, K_inv), K_trans)
# np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv)
# Check if any of the variances is negative because of
# numerical issues. If yes: set the variance to 0.
y_var_negative = y_var < 0
if np.any(y_var_negative):
if self.verbose > 4:
warnings.warn("Predicted variances smaller than 0. "
"Setting those variances to 0.")
y_var[y_var_negative] = 0.0
y_std = np.sqrt(y_var)
# Undo normalization
if self.preprocessing_y is not None:
y_std = self.preprocessing_y.\
inverse_transform_scale(y_std)
# Add infinite values
if self.infinities_classifier is not None:
y_std_full[finite] = y_std
y_std = y_std_full
return y_std
def __deepcopy__(self, memo):
"""
Overwrites the internal deepcopy method of the class in order to
also copy instance variables which are not defined in the init.
"""
# Initialize the stuff specified in init
c = GaussianProcessRegressor(
kernel=self.kernel,
noise_level=self.noise_level,
optimizer=self.optimizer,
n_restarts_optimizer=self.n_restarts_optimizer,
preprocessing_X=self.preprocessing_X,
preprocessing_y=self.preprocessing_y,
bounds=self.bounds,
random_state=self.random_state)
# Remember number of evaluations
if hasattr(self, "n_eval"):
c.n_eval = self.n_eval
if hasattr(self, "n_eval_loglike"):
c.n_eval_loglike = self.n_eval_loglike
# Initialize the X_train and y_train part
if hasattr(self, "X_train"):
c.X_train = np.copy(self.X_train)
if hasattr(self, "y_train"):
c.y_train = np.copy(self.y_train)
if hasattr(self, "X_train_"):
c.X_train_ = np.copy(self.X_train_)
if hasattr(self, "y_train_"):
c.y_train_ = np.copy(self.y_train_)
if hasattr(self, "X_train_all"):
c.X_train_all = np.copy(self.X_train_all)
if hasattr(self, "y_train_all"):
c.y_train_all = np.copy(self.y_train_all)
if hasattr(self, "X_train_all_"):
c.X_train_all_ = np.copy(self.X_train_all_)
if hasattr(self, "y_train_all_"):
c.y_train_all_ = np.copy(self.y_train_all_)
# Initialize noise levels
if hasattr(self, "noise_level"):
c.noise_level = self.noise_level
if hasattr(self, "noise_level_"):
c.noise_level_ = self.noise_level_
if hasattr(self, "alpha"):
c.alpha = self.alpha
# Initialize kernel and inverse kernel
if hasattr(self, "V_"):
c.V_ = np.copy(self.V_)
if hasattr(self, "L_"):
c.L_ = np.copy(self.L_)
if hasattr(self, "alpha_"):
c.alpha_ = np.copy(self.alpha_)
if hasattr(self, "kernel_"):
c.kernel_ = deepcopy(self.kernel_)
# Copy the right SVM
if hasattr(self, "infinities_classifier"):
c.infinities_classifier = deepcopy(self.infinities_classifier)
if hasattr(self, "_diff_threshold"):
c._diff_threshold = deepcopy(self._diff_threshold)
if hasattr(self, "keep_min_finite"):
c.keep_min_finite = deepcopy(self.keep_min_finite)
if hasattr(self, "trust_region_factor"):
c.trust_region_factor = deepcopy(self.trust_region_factor)
if hasattr(self, "trust_region_nstd"):
c.trust_region_nstd = deepcopy(self.trust_region_nstd)
if hasattr(self, "inf_value"):
c.inf_value = deepcopy(self.inf_value)
if hasattr(self, "minus_inf_value"):
c.minus_inf_value = deepcopy(self.minus_inf_value)
# Remember number of last appended points
if hasattr(self, "n_last_appended"):
c.n_last_appended = self.n_last_appended
if hasattr(self, "n_last_appended_finite"):
c.n_last_appended_finite = self.n_last_appended_finite
if hasattr(self, "newly_appended_for_inv"):
c.newly_appended_for_inv = self.newly_appended_for_inv
# Remember if it has been fit to data.
if hasattr(self, "_fitted"):
c._fitted = self._fitted
return c
def _constrained_optimization(self, obj_func, initial_theta, bounds):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if self.optimizer == "fmin_l_bfgs_b":
opt_res = scipy.optimize.minimize(
obj_func, initial_theta, method="L-BFGS-B", jac=True,
bounds=bounds)
# Temporarily disabled bc incompatibility between current sklearn+scipy
# if self.verbose > 1:
# _check_optimize_result("lbfgs", opt_res)
theta_opt, func_min = opt_res.x, opt_res.fun
elif callable(self.optimizer):
theta_opt, func_min = \
self.optimizer(obj_func, initial_theta, bounds=bounds)
else:
raise ValueError("Unknown optimizer %s." % self.optimizer)
return theta_opt, func_min
[docs]
def _kernel_inverse(self, kernel):
""" Compute inverse of the kernel and store relevant quantities"""
try:
self.L_ = cholesky(kernel, lower=True)
self.V_ = solve_triangular(self.L_, np.eye(self.L_.shape[0]), lower=True)
except np.linalg.LinAlgError as exc:
exc.args = ("The kernel, %s, is not returning a "
"positive definite matrix. Try gradually "
"increasing the 'noise_level' parameter of your "
"GaussianProcessRegressor estimator."
% self.kernel_) + exc.args
raise
self.alpha_ = cho_solve((self.L_, True), self.y_train_)
[docs]
@staticmethod
def compute_threshold_given_sigma(n_sigma, n_dimensions):
r"""
Computes threshold value given a number of :math:`\sigma` away from the maximum,
assuming a :math:`\chi^2` distribution.
"""
return delta_logp_of_1d_nstd(n_sigma, n_dimensions)
[docs]
@staticmethod
def _diff_threshold_if_keep_n_finite(y, n, reference_diff_threshold, epsilon=1e-6):
"""
Recalculation of the relative threshold when imposing that at least ``n`` points
must be kept finite (i.e. in the training set). The "at least" imposes that the
minimum value returned is the given threshold.
It requires sorting the ``y`` values, which can be costly in extreme cases.
"""
if n is None or n <= 1:
return reference_diff_threshold
y_sorted = np.sort(y)
difference_to_nth_point = y_sorted[-1] - y_sorted[-min(n, len(y_sorted))]
return max(reference_diff_threshold, difference_to_nth_point + epsilon)