Source code for skmatter.decomposition._pcovc

import numpy as np
from sklearn import clone
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import (
    LogisticRegression,
    LogisticRegressionCV,
    Perceptron,
    RidgeClassifier,
    RidgeClassifierCV,
    SGDClassifier,
)
from sklearn.linear_model._base import LinearClassifierMixin
from sklearn.svm import LinearSVC
from sklearn.utils import check_array
from sklearn.utils.multiclass import check_classification_targets, type_of_target
from sklearn.utils.validation import check_is_fitted, validate_data
from skmatter.decomposition import _BasePCov
from skmatter.utils import check_cl_fit



[docs]
class PCovC(LinearClassifierMixin, _BasePCov):
    r"""Principal Covariates Classification (PCovC).

    As described in [Jorgensen2025]_, PCovC determines a latent-space projection
    :math:`\mathbf{T}` which minimizes a combined loss in supervised and
    unsupervised tasks.

    This projection is determined by the eigendecomposition of a modified gram
    matrix :math:`\mathbf{\tilde{K}}`

    .. math::
      \mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T +
            (1 - \alpha) \mathbf{Z}\mathbf{Z}^T

    where :math:`\alpha` is a mixing parameter, :math:`\mathbf{X}` is an input matrix of shape
    :math:`(n_{samples}, n_{features})`, and :math:`\mathbf{Z}` is a matrix of class confidence scores
    of shape :math:`(n_{samples}, n_{classes})`. For :math:`(n_{samples} < n_{features})`,
    this can be more efficiently computed using the eigendecomposition of a modified covariance matrix
    :math:`\mathbf{\tilde{C}}`

    .. math::
      \mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} +
            (1 - \alpha) \left(\left(\mathbf{X}^T
            \mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T
            \mathbf{Z}\mathbf{Z}^T \mathbf{X} \left(\mathbf{X}^T
            \mathbf{X}\right)^{-\frac{1}{2}}\right)

    For all PCovC methods, it is strongly suggested that :math:`\mathbf{X}` is centered
    and scaled to unit variance, otherwise the results will change drastically near
    :math:`\alpha \to 0` and :math:`\alpha \to 1`.
    This can be done with the companion preprocessing classes, where

    >>> from skmatter.preprocessing import StandardFlexibleScaler as SFS
    >>> import numpy as np
    >>>
    >>> # Set column_wise to True when the columns are relative to one another,
    >>> # False otherwise.
    >>> scaler = SFS(column_wise=True)
    >>>
    >>> A = np.array([[1, 2], [2, 1]])  # replace with your matrix
    >>> scaler.fit(A)
    StandardFlexibleScaler(column_wise=True)
    >>> A = scaler.transform(A)

    Parameters
    ----------
    mixing: float, default=0.5
        mixing parameter, as described in PCovC as :math:`{\alpha}`, here named
        to avoid confusion with regularization parameter `alpha`

    n_components : int, float or str, default=None
        Number of components to keep.
        if n_components is not set all components are kept::

            n_components == min(n_samples, n_features)

    svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'
        If auto :
            The solver is selected by a default policy based on `X.shape` and
            `n_components`: if the input data is larger than 500x500 and the
            number of components to extract is lower than 80% of the smallest
            dimension of the data, then the more efficient 'randomized'
            method is enabled. Otherwise the exact full SVD is computed and
            optionally truncated afterwards.
        If full :
            run exact full SVD calling the standard LAPACK solver via
            `scipy.linalg.svd` and select the components by postprocessing
        If arpack :
            run SVD truncated to n_components calling ARPACK solver via
            `scipy.sparse.linalg.svds`. It requires strictly
            0 < n_components < min(X.shape)
        If randomized :
            run randomized SVD by the method of Halko et al.

    tol : float, default=1e-12
        Tolerance for singular values computed by svd_solver == 'arpack'.
        Must be of range [0.0, infinity).

    space: {'feature', 'sample', 'auto'}, default='auto'
        whether to compute the PCovC in `sample` or `feature` space
        default=`sample` when :math:`{n_{samples} < n_{features}}` and
        `feature` when :math:`{n_{features} < n_{samples}}`

     classifier: `estimator object` or `precomputed`, default=None
        classifier for computing :math:`{\mathbf{Z}}`. The classifier should be
        one of the following:

        - ``sklearn.linear_model.LogisticRegression()``
        - ``sklearn.linear_model.LogisticRegressionCV()``
        - ``sklearn.svm.LinearSVC()``
        - ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
        - ``sklearn.linear_model.RidgeClassifier()``
        - ``sklearn.linear_model.RidgeClassifierCV()``
        - ``sklearn.linear_model.Perceptron()``

        If a pre-fitted classifier
        is provided, it is used to compute :math:`{\mathbf{Z}}`.
        Note that any pre-fitting of the classifier will be lost if `PCovC` is
        within a composite estimator that enforces cloning, e.g.,
        `sklearn.pipeline.Pipeline` with model caching.
        In such cases, the classifier will be re-fitted on the same
        training data as the composite estimator.
        If None, ``sklearn.linear_model.LogisticRegression()``
        is used as the classifier.

    iterated_power : int or 'auto', default='auto'
        Number of iterations for the power method computed by
        svd_solver == 'randomized'.
        Must be of range [0, infinity).

    random_state : int, RandomState instance or None, default=None
        Used when the 'arpack' or 'randomized' solvers are used. Pass an int
        for reproducible results across multiple function calls.

    whiten : boolean, deprecated

    Attributes
    ----------
    mixing: float, default=0.5
        mixing parameter, as described in PCovC as :math:`{\alpha}`

    tol: float, default=1e-12
        Tolerance for singular values computed by svd_solver == 'arpack'.
        Must be of range [0.0, infinity).

    space: {'feature', 'sample', 'auto'}, default='auto'
        whether to compute the PCovC in `sample` or `feature` space
        default=`sample` when :math:`{n_{samples} < n_{features}}` and
        `feature` when :math:`{n_{features} < n_{samples}}`

    n_components_ : int
        The estimated number of components, which equals the parameter
        n_components, or the lesser value of n_features and n_samples
        if n_components is None.

    classifier : estimator object
        The linear classifier passed for fitting.

    z_classifier_ : estimator object
        The linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`.

    classifier_ : estimator object
        The linear classifier fit between :math:`\mathbf{T}` and  :math:`\mathbf{Y}`.

    pxt_ : ndarray of size :math:`({n_{features}, n_{components}})`
        the projector, or weights, from the input space :math:`\mathbf{X}`
        to the latent-space projection :math:`\mathbf{T}`

    pxz_ : ndarray of size :math:`({n_{features}, })` or :math:`({n_{features}, n_{classes}})`
        the projector, or weights, from the input space :math:`\mathbf{X}`
        to the class confidence scores :math:`\mathbf{Z}`

    ptz_ : ndarray of size :math:`({n_{components}, })` or :math:`({n_{components}, n_{classes}})`
        the projector, or weights, from the latent-space projection
        :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`

    explained_variance_ : numpy.ndarray of shape (n_components,)
        The amount of variance explained by each of the selected components.
        Equal to n_components largest eigenvalues
        of the PCovC-modified covariance matrix of :math:`\mathbf{X}`.

    singular_values_ : numpy.ndarray of shape (n_components,)
        The singular values corresponding to each of the selected components.

    Examples
    --------
    >>> import numpy as np
    >>> from skmatter.decomposition import PCovC
    >>> from sklearn.preprocessing import StandardScaler
    >>> X = np.array([[-1, 0, -2, 3], [3, -2, 0, 1], [-3, 0, -1, -1], [1, 3, 0, -2]])
    >>> X = StandardScaler().fit_transform(X)
    >>> Y = np.array([0, 1, 2, 0])
    >>> pcovc = PCovC(mixing=0.1, n_components=2)
    >>> pcovc.fit(X, Y)
    PCovC(mixing=0.1, n_components=2)
    >>> pcovc.transform(X)
    array([[-0.4794854 , -0.46228114],
           [ 1.9416966 ,  0.2532831 ],
           [-1.08744947,  0.89117784],
           [-0.37476173, -0.6821798 ]])
    >>> pcovc.predict(X)
    array([0, 1, 2, 0])
    """  # NoQa: E501

    def __init__(
        self,
        mixing=0.5,
        n_components=None,
        svd_solver="auto",
        tol=1e-12,
        space="auto",
        classifier=None,
        iterated_power="auto",
        random_state=None,
        whiten=False,
    ):
        super().__init__(
            mixing=mixing,
            n_components=n_components,
            svd_solver=svd_solver,
            tol=tol,
            space=space,
            iterated_power=iterated_power,
            random_state=random_state,
            whiten=whiten,
        )
        self.classifier = classifier


[docs]
    def fit(self, X, Y, W=None):
        r"""Fit the model with X and Y.

        Note that W is taken from the coefficients of a linear classifier fit
        between X and Y to compute Z:

        .. math::
            \mathbf{Z} = \mathbf{X} \mathbf{W}

        We then call either `_fit_feature_space` or `_fit_sample_space`,
        using Z as our approximation of Y. Finally, we refit a classifier on
        T and Y to obtain :math:`\mathbf{P}_{TZ}`.

        Parameters
        ----------
        X : numpy.ndarray, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples and
            n_features is the number of features.

            It is suggested that :math:`\mathbf{X}` be centered by its column-
            means and scaled. If features are related, the matrix should be
            scaled to have unit variance, otherwise :math:`\mathbf{X}` should
            be scaled so that each feature has a variance of 1 / n_features.

        Y : numpy.ndarray, shape (n_samples,)
            Training data, where n_samples is the number of samples.

        W : numpy.ndarray, shape (n_features, n_classes)
            Classification weights, optional when classifier = `precomputed`. If
            not passed, it is assumed that the weights will be taken from a
            linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`
        """
        X, Y = validate_data(self, X, Y, y_numeric=False)
        check_classification_targets(Y)
        self.classes_ = np.unique(Y)

        super().fit(X)

        compatible_classifiers = (
            LogisticRegression,
            LogisticRegressionCV,
            LinearSVC,
            LinearDiscriminantAnalysis,
            RidgeClassifier,
            RidgeClassifierCV,
            SGDClassifier,
            Perceptron,
        )

        if self.classifier not in ["precomputed", None] and not isinstance(
            self.classifier, compatible_classifiers
        ):
            raise ValueError(
                "Classifier must be an instance of `"
                f"{'`, `'.join(c.__name__ for c in compatible_classifiers)}`"
                ", or `precomputed`"
            )

        if self.classifier != "precomputed":
            if self.classifier is None:
                classifier = LogisticRegression()
            else:
                classifier = self.classifier

            self.z_classifier_ = check_cl_fit(classifier, X, Y)
            W = self.z_classifier_.coef_.T.reshape(X.shape[1], -1)

        else:
            # If precomputed, use default classifier to predict Y from T
            classifier = LogisticRegression()
            if W is None:
                W = LogisticRegression().fit(X, Y).coef_.T
                W = W.reshape(X.shape[1], -1)

        Z = X @ W

        if self.space_ == "feature":
            self._fit_feature_space(X, Y, Z)
        else:
            self._fit_sample_space(X, Y, Z, W)

        # instead of using linear regression solution, refit with the
        # classifier and steal weights to get pxz and ptz
        self.classifier_ = clone(classifier).fit(X @ self.pxt_, Y)

        self.ptz_ = self.classifier_.coef_.T
        self.pxz_ = self.pxt_ @ self.ptz_

        if len(Y.shape) == 1 and type_of_target(Y) == "binary":
            self.pxz_ = self.pxz_.reshape(
                X.shape[1],
            )
            self.ptz_ = self.ptz_.reshape(
                self.n_components_,
            )

        self.components_ = self.pxt_.T  # for sklearn compatibility
        return self



[docs]
    def _fit_feature_space(self, X, Y, Z):
        r"""In feature-space PCovC, the projectors are determined by:

        .. math::
            \mathbf{\tilde{C}} = \alpha \mathbf{X}^T \mathbf{X} +
            (1 - \alpha) \left(\left(\mathbf{X}^T
            \mathbf{X}\right)^{-\frac{1}{2}} \mathbf{X}^T
            \mathbf{Z}\mathbf{Z}^T \mathbf{X} \left(\mathbf{X}^T
            \mathbf{X}\right)^{-\frac{1}{2}}\right)

        where

        .. math::
            \mathbf{P}_{XT} = (\mathbf{X}^T \mathbf{X})^{-\frac{1}{2}}
                                \mathbf{U}_\mathbf{\tilde{C}}^T
                                \mathbf{\Lambda}_\mathbf{\tilde{C}}^{\frac{1}{2}}

        .. math::
            \mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{C}}^{-\frac{1}{2}}
                                \mathbf{U}_\mathbf{\tilde{C}}^T
                                (\mathbf{X}^T \mathbf{X})^{\frac{1}{2}}
        """
        return super()._fit_feature_space(X, Y, Z, compute_pty_=False)



[docs]
    def _fit_sample_space(self, X, Y, Z, W):
        r"""In sample-space PCovC, the projectors are determined by:

        .. math::
            \mathbf{\tilde{K}} = \alpha \mathbf{X} \mathbf{X}^T +
            (1 - \alpha) \mathbf{Z}\mathbf{Z}^T

        where

        .. math::
            \mathbf{P}_{XT} = \left(\alpha \mathbf{X}^T + (1 - \alpha)
                               \mathbf{W} \mathbf{Z}^T\right)
                               \mathbf{U}_\mathbf{\tilde{K}}
                               \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}}

        .. math::
            \mathbf{P}_{TX} = \mathbf{\Lambda}_\mathbf{\tilde{K}}^{-\frac{1}{2}}
                                \mathbf{U}_\mathbf{\tilde{K}}^T \mathbf{X}
        """
        return super()._fit_sample_space(X, Y, Z, W, compute_pty_=False)



[docs]
    def inverse_transform(self, T):
        r"""Transform data back to its original space.

        .. math::
            \mathbf{\hat{X}} = \mathbf{T} \mathbf{P}_{TX}
                              = \mathbf{X} \mathbf{P}_{XT} \mathbf{P}_{TX}

        Parameters
        ----------
        T : ndarray, shape (n_samples, n_components)
            Projected data, where n_samples is the number of samples
            and n_components is the number of components.

        Returns
        -------
        X_original : numpy.ndarray, shape (n_samples, n_features)
        """
        return super().inverse_transform(T)



[docs]
    def decision_function(self, X=None, T=None):
        r"""Predicts confidence scores from X or T.

        .. math::
            \mathbf{Z} = \mathbf{T} \mathbf{P}_{TZ}
                       = \mathbf{X} \mathbf{P}_{XT} \mathbf{P}_{TZ}
                       = \mathbf{X} \mathbf{P}_{XZ}

        Parameters
        ----------
        X : ndarray, shape(n_samples, n_features)
            Original data for which we want to get confidence scores,
            where n_samples is the number of samples and n_features is the
            number of features.

        T : ndarray, shape (n_samples, n_components)
            Projected data for which we want to get confidence scores,
            where n_samples is the number of samples and n_components is the
            number of components.

        Returns
        -------
        Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes)
            Confidence scores. For binary classification, has shape `(n_samples,)`,
            for multiclass classification, has shape `(n_samples, n_classes)`
        """
        check_is_fitted(self, attributes=["pxz_", "ptz_"])

        if X is None and T is None:
            raise ValueError("Either X or T must be supplied.")

        if X is not None:
            X = validate_data(self, X, reset=False)
            # Or self.classifier_.decision_function(X @ self.pxt_)
            return X @ self.pxz_ + self.classifier_.intercept_
        else:
            T = check_array(T)
            return T @ self.ptz_ + self.classifier_.intercept_



[docs]
    def predict(self, X=None, T=None):
        """Predicts the property labels using classification on T."""
        check_is_fitted(self, attributes=["pxz_", "ptz_"])

        if X is None and T is None:
            raise ValueError("Either X or T must be supplied.")

        if X is not None:
            X = validate_data(self, X, reset=False)
            return self.classifier_.predict(X @ self.pxt_)
        else:
            T = check_array(T)
            return self.classifier_.predict(T)



[docs]
    def transform(self, X=None):
        """Apply dimensionality reduction to X.

        ``X`` is projected on the first principal components as determined by
        the modified PCovC distances.

        Parameters
        ----------
        X : numpy.ndarray, shape (n_samples, n_features)
            New data, where n_samples is the number of samples
            and n_features is the number of features.
        """
        return super().transform(X)