Source code for skmatter.decomposition._pcovr

import numpy as np
from sklearn.base import MultiOutputMixin, RegressorMixin
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted, validate_data

from skmatter.decomposition import _BasePCov
from skmatter.utils import check_lr_fit


[docs] class PCovR(RegressorMixin, MultiOutputMixin, _BasePCov): r"""Principal Covariates Regression (PCovR). As described in [deJong1992]_, PCovR determines a latent-space projection :math:`\mathbf{T}` which minimizes a combined loss in supervised and unsupervised tasks. This projection is determined by the eigendecomposition of a modified gram matrix :math:`\mathbf{\tilde{K}}` .. math:: \mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T + (1 - \alpha) \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T where :math:`\alpha` is a mixing parameter and :math:`\mathbf{X}` and :math:`\mathbf{\hat{Y}}` are matrices of shapes :math:`(n_{samples}, n_{features})` and :math:`(n_{samples}, n_{properties})`, respectively, which contain the input and approximate targets. For :math:`(n_{samples} < n_{features})`, this can be more efficiently computed using the eigendecomposition of a modified covariance matrix :math:`\mathbf{\tilde{C}}` .. math:: \mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} + (1 - \alpha) \left(\left(\mathbf{X}^T \mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T \mathbf{X} \left(\mathbf{X}^T \mathbf{X}\right)^{-\frac{1}{2}}\right) For all PCovR methods, it is strongly suggested that :math:`\mathbf{X}` and :math:`\mathbf{Y}` are centered and scaled to unit variance, otherwise the results will change drastically near :math:`\alpha \to 0` and :math:`\alpha \to 1`. This can be done with the companion preprocessing classes, where >>> from skmatter.preprocessing import StandardFlexibleScaler as SFS >>> import numpy as np >>> >>> # Set column_wise to True when the columns are relative to one another, >>> # False otherwise. >>> scaler = SFS(column_wise=True) >>> >>> A = np.array([[1, 2], [2, 1]]) # replace with your matrix >>> scaler.fit(A) StandardFlexibleScaler(column_wise=True) >>> A = scaler.transform(A) Parameters ---------- mixing: float, default=0.5 mixing parameter, as described in PCovR as :math:`{\alpha}`, here named to avoid confusion with regularization parameter `alpha` n_components : int, float or str, default=None Number of components to keep. if n_components is not set all components are kept:: n_components == min(n_samples, n_features) svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto' If auto : The solver is selected by a default policy based on `X.shape` and `n_components`: if the input data is larger than 500x500 and the number of components to extract is lower than 80% of the smallest dimension of the data, then the more efficient 'randomized' method is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards. If full : run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd` and select the components by postprocessing If arpack : run SVD truncated to n_components calling ARPACK solver via `scipy.sparse.linalg.svds`. It requires strictly 0 < n_components < min(X.shape) If randomized : run randomized SVD by the method of Halko et al. tol : float, default=1e-12 Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). space: {'feature', 'sample', 'auto'}, default='auto' whether to compute the PCovR in `sample` or `feature` space default=`sample` when :math:`{n_{samples} < n_{features}}` and `feature` when :math:`{n_{features} < n_{samples}}` regressor: {`Ridge`, `RidgeCV`, `LinearRegression`, `precomputed`}, default=None regressor for computing approximated :math:`{\mathbf{\hat{Y}}}`. The regressor should be one `sklearn.linear_model.Ridge`, `sklearn.linear_model.RidgeCV`, or `sklearn.linear_model.LinearRegression`. If a pre-fitted regressor is provided, it is used to compute :math:`{\mathbf{\hat{Y}}}`. Note that any pre-fitting of the regressor will be lost if `PCovR` is within a composite estimator that enforces cloning, e.g., `sklearn.compose.TransformedTargetRegressor` or `sklearn.pipeline.Pipeline` with model caching. In such cases, the regressor will be re-fitted on the same training data as the composite estimator. If `precomputed`, we assume that the `y` passed to the `fit` function is the regressed form of the targets :math:`{\mathbf{\hat{Y}}}`. If None, ``sklearn.linear_model.Ridge('alpha':1e-6, 'fit_intercept':False, 'tol':1e-12)`` is used as the regressor. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by svd_solver == 'randomized'. Must be of range [0, infinity). random_state : int, :class:`numpy.random.RandomState` instance or None, default=None Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. whiten : boolean, deprecated Attributes ---------- mixing: float, default=0.5 mixing parameter, as described in PCovR as :math:`{\alpha}` tol: float, default=1e-12 Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). space: {'feature', 'sample', 'auto'}, default='auto' whether to compute the PCovR in `sample` or `feature` space default=`sample` when :math:`{n_{samples} < n_{features}}` and `feature` when :math:`{n_{features} < n_{samples}}` n_components_ : int The estimated number of components, which equals the parameter n_components, or the lesser value of n_features and n_samples if n_components is None. pxt_ : numpy.ndarray of size :math:`({n_{samples}, n_{components}})` the projector, or weights, from the input space :math:`\mathbf{X}` to the latent-space projection :math:`\mathbf{T}` pxy_ : numpy.ndarray of size :math:`({n_{samples}, n_{properties}})` the projector, or weights, from the input space :math:`\mathbf{X}` to the properties :math:`\mathbf{Y}` pty_ : numpy.ndarray of size :math:`({n_{components}, n_{properties}})` the projector, or weights, from the latent-space projection :math:`\mathbf{T}` to the properties :math:`\mathbf{Y}` explained_variance_ : numpy.ndarray of shape (n_components,) The amount of variance explained by each of the selected components. Equal to n_components largest eigenvalues of the PCovR-modified covariance matrix of :math:`\mathbf{X}`. singular_values_ : numpy.ndarray of shape (n_components,) The singular values corresponding to each of the selected components. Examples -------- >>> import numpy as np >>> from skmatter.decomposition import PCovR >>> X = np.array([[-1, 1, -3, 1], [1, -2, 1, 2], [-2, 0, -2, -2], [1, 0, 2, -1]]) >>> Y = np.array([[0, -5], [-1, 1], [1, -5], [-3, 2]]) >>> pcovr = PCovR(mixing=0.1, n_components=2) >>> pcovr.fit(X, Y) PCovR(mixing=0.1, n_components=2) >>> pcovr.transform(X) array([[ 3.2630561 , 0.06663787], [-2.69395511, -0.41582771], [ 3.48683147, -0.83164387], [-4.05593245, 1.18083371]]) >>> pcovr.predict(X) array([[ 0.01371776, -5.00945512], [-1.02805338, 1.06736871], [ 0.98166504, -4.98307078], [-2.9963189 , 1.98238856]]) """ # NoQa: E501 def __init__( self, mixing=0.5, n_components=None, svd_solver="auto", tol=1e-12, space="auto", regressor=None, iterated_power="auto", random_state=None, whiten=False, ): super().__init__( mixing=mixing, n_components=n_components, svd_solver=svd_solver, tol=tol, space=space, iterated_power=iterated_power, random_state=random_state, whiten=whiten, ) self.regressor = regressor
[docs] def fit(self, X, Y, W=None): r"""Fit the model with X and Y. Depending on the dimensions of X, calls either `_fit_feature_space` or `_fit_sample_space` Parameters ---------- X : numpy.ndarray, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. It is suggested that :math:`\mathbf{X}` be centered by its column- means and scaled. If features are related, the matrix should be scaled to have unit variance, otherwise :math:`\mathbf{X}` should be scaled so that each feature has a variance of 1 / n_features. Y : numpy.ndarray, shape (n_samples, n_properties) Training data, where n_samples is the number of samples and n_properties is the number of properties It is suggested that :math:`\mathbf{Y}` be centered by its column- means and scaled. If features are related, the matrix should be scaled to have unit variance, otherwise :math:`\mathbf{Y}` should be scaled so that each feature has a variance of 1 / n_features. If the passed regressor = `precomputed`, it is assumed that Y is the regressed form of the properties, :math:`{\mathbf{\hat{Y}}}`. W : numpy.ndarray, shape (n_features, n_properties) Regression weights, optional when regressor= `precomputed`. If not passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]` """ X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True) super().fit(X) compatible_regressors = (LinearRegression, Ridge, RidgeCV) if self.regressor not in ["precomputed", None] and not isinstance( self.regressor, compatible_regressors ): raise ValueError( "Regressor must be an instance of `" f"{'`, `'.join(r.__name__ for r in compatible_regressors)}`" ", or `precomputed`" ) # Assign the default regressor if self.regressor != "precomputed": if self.regressor is None: regressor = Ridge( alpha=1e-6, fit_intercept=False, tol=1e-12, ) else: regressor = self.regressor self.regressor_ = check_lr_fit(regressor, X, Y) W = self.regressor_.coef_.T.reshape(X.shape[1], -1) Yhat = self.regressor_.predict(X).reshape(X.shape[0], -1) else: Yhat = Y.copy() if W is None: W = np.linalg.lstsq(X, Yhat, self.tol)[0] if self.space_ == "feature": self._fit_feature_space(X, Y.reshape(Yhat.shape), Yhat) else: self._fit_sample_space(X, Y.reshape(Yhat.shape), Yhat, W) self.pxy_ = self.pxt_ @ self.pty_ if len(Y.shape) == 1: self.pxy_ = self.pxy_.reshape( X.shape[1], ) self.pty_ = self.pty_.reshape( self.n_components_, ) self.components_ = self.pxt_.T # for sklearn compatibility return self
[docs] def _fit_feature_space(self, X, Y, Yhat): r"""In feature-space PCovR, the projectors are determined by: .. math:: \mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} + (1 - \alpha) \left(\left(\mathbf{X}^T \mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T \mathbf{X} \left(\mathbf{X}^T \mathbf{X}\right)^{-\frac{1}{2}}\right) where .. math:: \mathbf{P}_{XT} = (\mathbf{X}^T \mathbf{X})^{-\frac{1}{2}} \mathbf{U}_\mathbf{\tilde{C}}^T \mathbf{\Lambda}_\mathbf{\tilde{C}}^{\frac{1}{2}} .. math:: \mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{C}}^{-\frac{1}{2}} \mathbf{U}_\mathbf{\tilde{C}}^T (\mathbf{X}^T \mathbf{X})^{\frac{1}{2}} .. math:: \mathbf{P}_{TY} = \mathbf{\Lambda}_\mathbf{\tilde{C}}^{-\frac{1}{2}} \mathbf{U}_\mathbf{\tilde{C}}^T (\mathbf{X}^T \mathbf{X})^{-\frac{1}{2}} \mathbf{X}^T \mathbf{Y} """ return super()._fit_feature_space(X, Y, Yhat)
[docs] def _fit_sample_space(self, X, Y, Yhat, W): r"""In sample-space PCovR, the projectors are determined by: .. math:: \mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T + (1 - \alpha) \mathbf{\hat{Y}}\mathbf{\hat{Y}}^T where .. math:: \mathbf{P}_{XT} = \left(\alpha \mathbf{X}^T + (1 - \alpha) \mathbf{W} \mathbf{\hat{Y}}^T\right) \mathbf{U}_\mathbf{\tilde{K}} \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}} .. math:: \mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}} \mathbf{U}_\mathbf{\tilde{K}}^T \mathbf{X} .. math:: \mathbf{P}_{TY} = \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}} \mathbf{U}_\mathbf{\tilde{K}}^T \mathbf{Y} """ return super()._fit_sample_space(X, Y, Yhat, W)
[docs] def inverse_transform(self, T): r"""Transform data back to its original space. .. math:: \mathbf{\hat{X}} = \mathbf{T} \mathbf{P}_{TX} = \mathbf{X} \mathbf{P}_{XT} \mathbf{P}_{TX} Parameters ---------- T : ndarray, shape (n_samples, n_components) Projected data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_original : numpy.ndarray, shape (n_samples, n_features) """ return super().inverse_transform(T)
[docs] def predict(self, X=None, T=None): """Predicts the property values using regression on X or T.""" check_is_fitted(self, ["pxy_", "pty_"]) if X is None and T is None: raise ValueError("Either X or T must be supplied.") if X is not None: X = validate_data(self, X, reset=False) return X @ self.pxy_ else: T = check_array(T) return T @ self.pty_
[docs] def transform(self, X=None): """Apply dimensionality reduction to X. ``X`` is projected on the first principal components as determined by the modified PCovR distances. Parameters ---------- X : numpy.ndarray, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. """ return super().transform(X)
[docs] def score(self, X, y, T=None): r"""Return the (negative) total reconstruction error for X and Y, defined as: .. math:: \ell_{X} = \frac{\lVert \mathbf{X} - \mathbf{T}\mathbf{P}_{TX} \rVert ^ 2} {\lVert \mathbf{X}\rVert ^ 2} and .. math:: \ell_{Y} = \frac{\lVert \mathbf{Y} - \mathbf{T}\mathbf{P}_{TY} \rVert ^ 2} {\lVert \mathbf{Y}\rVert ^ 2} The negative loss :math:`-\ell = -(\ell_{X} + \ell{Y})` is returned for easier use in sklearn pipelines, e.g., a grid search, where methods named 'score' are meant to be maximized. Parameters ---------- X : numpy.ndarray of shape (n_samples, n_features) The data. Y : numpy.ndarray of shape (n_samples, n_properties) The target. Returns ------- loss : float Negative sum of the loss in reconstructing X from the latent-space projection T and the loss in predicting Y from the latent-space projection T """ X, y = validate_data(self, X, y, reset=False) if T is None: T = self.transform(X) Xrec = self.inverse_transform(T) ypred = self.predict(T=T) return -( np.linalg.norm(X - Xrec) ** 2.0 / np.linalg.norm(X) ** 2.0 + np.linalg.norm(y - ypred) ** 2.0 / np.linalg.norm(y) ** 2.0 )
def __sklearn_tags__(self): tags = super().__sklearn_tags__() tags.regressor_tags.poor_score = True return tags