import numpy as np
from sklearn.base import MultiOutputMixin, RegressorMixin
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted, validate_data
from skmatter.decomposition import _BasePCov
from skmatter.utils import check_lr_fit
[docs]
class PCovR(RegressorMixin, MultiOutputMixin, _BasePCov):
r"""Principal Covariates Regression (PCovR).
As described in [deJong1992]_, PCovR determines a latent-space projection
:math:`\mathbf{T}` which minimizes a combined loss in supervised and
unsupervised tasks.
This projection is determined by the eigendecomposition of a modified gram
matrix :math:`\mathbf{\tilde{K}}`
.. math::
\mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T +
(1 - \alpha) \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T
where :math:`\alpha` is a mixing parameter and
:math:`\mathbf{X}` and :math:`\mathbf{\hat{Y}}` are matrices of shapes
:math:`(n_{samples}, n_{features})` and :math:`(n_{samples}, n_{properties})`,
respectively, which contain the input and approximate targets. For
:math:`(n_{samples} < n_{features})`, this can be more efficiently computed
using the eigendecomposition of a modified covariance matrix
:math:`\mathbf{\tilde{C}}`
.. math::
\mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} +
(1 - \alpha) \left(\left(\mathbf{X}^T
\mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T
\mathbf{\hat{Y}}\mathbf{\hat{Y}}^T \mathbf{X} \left(\mathbf{X}^T
\mathbf{X}\right)^{-\frac{1}{2}}\right)
For all PCovR methods, it is strongly suggested that :math:`\mathbf{X}` and
:math:`\mathbf{Y}` are centered and scaled to unit variance, otherwise the
results will change drastically near :math:`\alpha \to 0` and :math:`\alpha \to 1`.
This can be done with the companion preprocessing classes, where
>>> from skmatter.preprocessing import StandardFlexibleScaler as SFS
>>> import numpy as np
>>>
>>> # Set column_wise to True when the columns are relative to one another,
>>> # False otherwise.
>>> scaler = SFS(column_wise=True)
>>>
>>> A = np.array([[1, 2], [2, 1]]) # replace with your matrix
>>> scaler.fit(A)
StandardFlexibleScaler(column_wise=True)
>>> A = scaler.transform(A)
Parameters
----------
mixing: float, default=0.5
mixing parameter, as described in PCovR as :math:`{\alpha}`, here named to avoid
confusion with regularization parameter `alpha`
n_components : int, float or str, default=None
Number of components to keep.
if n_components is not set all components are kept::
n_components == min(n_samples, n_features)
svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'
If auto :
The solver is selected by a default policy based on `X.shape` and
`n_components`: if the input data is larger than 500x500 and the number of
components to extract is lower than 80% of the smallest dimension of the
data, then the more efficient 'randomized' method is enabled. Otherwise the
exact full SVD is computed and optionally truncated afterwards.
If full :
run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd`
and select the components by postprocessing
If arpack :
run SVD truncated to n_components calling ARPACK solver via
`scipy.sparse.linalg.svds`. It requires strictly 0 < n_components <
min(X.shape)
If randomized :
run randomized SVD by the method of Halko et al.
tol : float, default=1e-12
Tolerance for singular values computed by svd_solver == 'arpack'. Must be of
range [0.0, infinity).
space: {'feature', 'sample', 'auto'}, default='auto'
whether to compute the PCovR in `sample` or `feature` space default=`sample`
when :math:`{n_{samples} < n_{features}}` and `feature` when
:math:`{n_{features} < n_{samples}}`
regressor: {`Ridge`, `RidgeCV`, `LinearRegression`, `precomputed`}, default=None
regressor for computing approximated :math:`{\mathbf{\hat{Y}}}`. The regressor
should be one `sklearn.linear_model.Ridge`, `sklearn.linear_model.RidgeCV`, or
`sklearn.linear_model.LinearRegression`. If a pre-fitted regressor is provided,
it is used to compute :math:`{\mathbf{\hat{Y}}}`. Note that any pre-fitting of
the regressor will be lost if `PCovR` is within a composite estimator that
enforces cloning, e.g., `sklearn.compose.TransformedTargetRegressor` or
`sklearn.pipeline.Pipeline` with model caching. In such cases, the regressor
will be re-fitted on the same training data as the composite estimator. If
`precomputed`, we assume that the `y` passed to the `fit` function is the
regressed form of the targets :math:`{\mathbf{\hat{Y}}}`. If None,
``sklearn.linear_model.Ridge('alpha':1e-6, 'fit_intercept':False, 'tol':1e-12)``
is used as the regressor.
iterated_power : int or 'auto', default='auto'
Number of iterations for the power method computed by svd_solver ==
'randomized'. Must be of range [0, infinity).
random_state : int, :class:`numpy.random.RandomState` instance or None, default=None
Used when the 'arpack' or 'randomized' solvers are used. Pass an int for
reproducible results across multiple function calls.
whiten : boolean, deprecated
Attributes
----------
mixing: float, default=0.5
mixing parameter, as described in PCovR as :math:`{\alpha}`
tol: float, default=1e-12
Tolerance for singular values computed by svd_solver == 'arpack'.
Must be of range [0.0, infinity).
space: {'feature', 'sample', 'auto'}, default='auto'
whether to compute the PCovR in `sample` or `feature` space default=`sample`
when :math:`{n_{samples} < n_{features}}` and `feature` when
:math:`{n_{features} < n_{samples}}`
n_components_ : int
The estimated number of components, which equals the parameter n_components, or
the lesser value of n_features and n_samples if n_components is None.
pxt_ : numpy.ndarray of size :math:`({n_{samples}, n_{components}})`
the projector, or weights, from the input space :math:`\mathbf{X}` to the
latent-space projection :math:`\mathbf{T}`
pxy_ : numpy.ndarray of size :math:`({n_{samples}, n_{properties}})`
the projector, or weights, from the input space :math:`\mathbf{X}` to the
properties :math:`\mathbf{Y}`
pty_ : numpy.ndarray of size :math:`({n_{components}, n_{properties}})`
the projector, or weights, from the latent-space projection :math:`\mathbf{T}`
to the properties :math:`\mathbf{Y}`
explained_variance_ : numpy.ndarray of shape (n_components,)
The amount of variance explained by each of the selected components.
Equal to n_components largest eigenvalues
of the PCovR-modified covariance matrix of :math:`\mathbf{X}`.
singular_values_ : numpy.ndarray of shape (n_components,)
The singular values corresponding to each of the selected components.
Examples
--------
>>> import numpy as np
>>> from skmatter.decomposition import PCovR
>>> X = np.array([[-1, 1, -3, 1], [1, -2, 1, 2], [-2, 0, -2, -2], [1, 0, 2, -1]])
>>> Y = np.array([[0, -5], [-1, 1], [1, -5], [-3, 2]])
>>> pcovr = PCovR(mixing=0.1, n_components=2)
>>> pcovr.fit(X, Y)
PCovR(mixing=0.1, n_components=2)
>>> pcovr.transform(X)
array([[ 3.2630561 , 0.06663787],
[-2.69395511, -0.41582771],
[ 3.48683147, -0.83164387],
[-4.05593245, 1.18083371]])
>>> pcovr.predict(X)
array([[ 0.01371776, -5.00945512],
[-1.02805338, 1.06736871],
[ 0.98166504, -4.98307078],
[-2.9963189 , 1.98238856]])
""" # NoQa: E501
def __init__(
self,
mixing=0.5,
n_components=None,
svd_solver="auto",
tol=1e-12,
space="auto",
regressor=None,
iterated_power="auto",
random_state=None,
whiten=False,
):
super().__init__(
mixing=mixing,
n_components=n_components,
svd_solver=svd_solver,
tol=tol,
space=space,
iterated_power=iterated_power,
random_state=random_state,
whiten=whiten,
)
self.regressor = regressor
[docs]
def fit(self, X, Y, W=None):
r"""Fit the model with X and Y. Depending on the dimensions of X, calls either
`_fit_feature_space` or `_fit_sample_space`
Parameters
----------
X : numpy.ndarray, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and n_features is
the number of features.
It is suggested that :math:`\mathbf{X}` be centered by its column-
means and scaled. If features are related, the matrix should be scaled
to have unit variance, otherwise :math:`\mathbf{X}` should be
scaled so that each feature has a variance of 1 / n_features.
Y : numpy.ndarray, shape (n_samples, n_properties)
Training data, where n_samples is the number of samples and n_properties is
the number of properties
It is suggested that :math:`\mathbf{Y}` be centered by its column- means and
scaled. If features are related, the matrix should be scaled to have unit
variance, otherwise :math:`\mathbf{Y}` should be scaled so that each feature
has a variance of 1 / n_features.
If the passed regressor = `precomputed`, it is assumed that Y is the
regressed form of the properties, :math:`{\mathbf{\hat{Y}}}`.
W : numpy.ndarray, shape (n_features, n_properties)
Regression weights, optional when regressor= `precomputed`. If not
passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
"""
X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
super().fit(X)
compatible_regressors = (LinearRegression, Ridge, RidgeCV)
if self.regressor not in ["precomputed", None] and not isinstance(
self.regressor, compatible_regressors
):
raise ValueError(
"Regressor must be an instance of `"
f"{'`, `'.join(r.__name__ for r in compatible_regressors)}`"
", or `precomputed`"
)
# Assign the default regressor
if self.regressor != "precomputed":
if self.regressor is None:
regressor = Ridge(
alpha=1e-6,
fit_intercept=False,
tol=1e-12,
)
else:
regressor = self.regressor
self.regressor_ = check_lr_fit(regressor, X, Y)
W = self.regressor_.coef_.T.reshape(X.shape[1], -1)
Yhat = self.regressor_.predict(X).reshape(X.shape[0], -1)
else:
Yhat = Y.copy()
if W is None:
W = np.linalg.lstsq(X, Yhat, self.tol)[0]
if self.space_ == "feature":
self._fit_feature_space(X, Y.reshape(Yhat.shape), Yhat)
else:
self._fit_sample_space(X, Y.reshape(Yhat.shape), Yhat, W)
self.pxy_ = self.pxt_ @ self.pty_
if len(Y.shape) == 1:
self.pxy_ = self.pxy_.reshape(
X.shape[1],
)
self.pty_ = self.pty_.reshape(
self.n_components_,
)
self.components_ = self.pxt_.T # for sklearn compatibility
return self
[docs]
def _fit_feature_space(self, X, Y, Yhat):
r"""In feature-space PCovR, the projectors are determined by:
.. math::
\mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} +
(1 - \alpha) \left(\left(\mathbf{X}^T
\mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T
\mathbf{\hat{Y}}\mathbf{\hat{Y}}^T \mathbf{X} \left(\mathbf{X}^T
\mathbf{X}\right)^{-\frac{1}{2}}\right)
where
.. math::
\mathbf{P}_{XT} = (\mathbf{X}^T \mathbf{X})^{-\frac{1}{2}}
\mathbf{U}_\mathbf{\tilde{C}}^T
\mathbf{\Lambda}_\mathbf{\tilde{C}}^{\frac{1}{2}}
.. math::
\mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{C}}^{-\frac{1}{2}}
\mathbf{U}_\mathbf{\tilde{C}}^T
(\mathbf{X}^T \mathbf{X})^{\frac{1}{2}}
.. math::
\mathbf{P}_{TY} = \mathbf{\Lambda}_\mathbf{\tilde{C}}^{-\frac{1}{2}}
\mathbf{U}_\mathbf{\tilde{C}}^T (\mathbf{X}^T
\mathbf{X})^{-\frac{1}{2}} \mathbf{X}^T
\mathbf{Y}
"""
return super()._fit_feature_space(X, Y, Yhat)
[docs]
def _fit_sample_space(self, X, Y, Yhat, W):
r"""In sample-space PCovR, the projectors are determined by:
.. math::
\mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T +
(1 - \alpha) \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T
where
.. math::
\mathbf{P}_{XT} = \left(\alpha \mathbf{X}^T + (1 - \alpha)
\mathbf{W} \mathbf{\hat{Y}}^T\right)
\mathbf{U}_\mathbf{\tilde{K}}
\mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}}
.. math::
\mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}}
\mathbf{U}_\mathbf{\tilde{K}}^T \mathbf{X}
.. math::
\mathbf{P}_{TY} = \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}}
\mathbf{U}_\mathbf{\tilde{K}}^T \mathbf{Y}
"""
return super()._fit_sample_space(X, Y, Yhat, W)
[docs]
def predict(self, X=None, T=None):
"""Predicts the property values using regression on X or T."""
check_is_fitted(self, ["pxy_", "pty_"])
if X is None and T is None:
raise ValueError("Either X or T must be supplied.")
if X is not None:
X = validate_data(self, X, reset=False)
return X @ self.pxy_
else:
T = check_array(T)
return T @ self.pty_
[docs]
def score(self, X, y, T=None):
r"""Return the (negative) total reconstruction error for X and Y,
defined as:
.. math::
\ell_{X} = \frac{\lVert \mathbf{X} - \mathbf{T}\mathbf{P}_{TX} \rVert ^ 2}
{\lVert \mathbf{X}\rVert ^ 2}
and
.. math::
\ell_{Y} = \frac{\lVert \mathbf{Y} - \mathbf{T}\mathbf{P}_{TY} \rVert ^ 2}
{\lVert \mathbf{Y}\rVert ^ 2}
The negative loss :math:`-\ell = -(\ell_{X} + \ell{Y})` is returned for easier
use in sklearn pipelines, e.g., a grid search, where methods named 'score' are
meant to be maximized.
Parameters
----------
X : numpy.ndarray of shape (n_samples, n_features)
The data.
Y : numpy.ndarray of shape (n_samples, n_properties)
The target.
Returns
-------
loss : float
Negative sum of the loss in reconstructing X from the latent-space
projection T and the loss in predicting Y from the latent-space projection T
"""
X, y = validate_data(self, X, y, reset=False)
if T is None:
T = self.transform(X)
Xrec = self.inverse_transform(T)
ypred = self.predict(T=T)
return -(
np.linalg.norm(X - Xrec) ** 2.0 / np.linalg.norm(X) ** 2.0
+ np.linalg.norm(y - ypred) ** 2.0 / np.linalg.norm(y) ** 2.0
)
def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.regressor_tags.poor_score = True
return tags