Source code for skmatter.utils._orthogonalizers
# -*- coding: utf-8 -*-
"""Necessary orthogonalizers for the CUR decomposition subselection method.
Authors: Rose K. Cersonsky
Michele Ceriotti
"""
import warnings
import numpy as np
[docs]
def X_orthogonalizer(x1, c=None, x2=None, tol=1e-12, copy=False):
"""Orthogonalizes a feature matrix by the given columns.
Can be used to orthogonalize by samples by calling `X = X_orthogonalizer(X.T,
row_index).T`. After orthogonalization, each column of X will contain only what is
orthogonal to X[:, c] or x2.
Parameters
----------
x1: numpy.ndarray of shape (n x m)
feature matrix to orthogonalize
c: int, less than m, default=None
index of the column to orthogonalize by
x2: numpy.ndarray of shape (n x a), default=x1[:, c]
a separate set of columns to orthogonalize with respect to
Note: the orthogonalizer will work column-by-column in column-index order
"""
if x2 is None and c is not None:
cols = x1[:, [c]]
elif x2.shape[0] == x1.shape[0]:
cols = np.reshape(x2, (x1.shape[0], -1))
else:
raise ValueError(
"You can only orthogonalize a matrix using a vector with the same number "
f"of rows. Matrix X has {x1.shape[0]} rows, whereas the orthogonalizing "
f"matrix has {x2.shape[0]} rows."
)
if copy:
xnew = x1.copy()
else:
xnew = x1
for i in range(cols.shape[-1]):
col = cols[:, [i]]
if np.linalg.norm(col) < tol:
warnings.warn("Column vector contains only zeros.", stacklevel=1)
else:
col = np.divide(col, np.linalg.norm(col, axis=0))
xnew -= (col @ (col.T @ xnew)).astype(xnew.dtype)
return xnew
[docs]
def Y_feature_orthogonalizer(y, X, tol=1e-12, copy=True):
r"""Orthogonalizes a property matrix given the selected features in
:math:`\mathbf{X}`.
.. math::
\mathbf{Y} \leftarrow \mathbf{Y} -
\mathbf{X} \left(\mathbf{X}^T\mathbf{X}\right)^{-1}\mathbf{X}^T \mathbf{Y}
Parameters
----------
y : numpy.ndarray of shape (n_samples x n_properties)
property matrix
X : numpy.ndarray of shape (n_samples x n_features)
feature matrix
tol: float
cutoff for small eigenvalues to send to np.linalg.pinv
copy: bool
whether to return a copy of y or edit in-place, default=True
"""
v = np.linalg.pinv(np.matmul(X.T, X), rcond=tol)
v = np.matmul(X, v)
v = np.matmul(v, X.T)
if copy:
return y.copy() - np.matmul(v, y)
else:
y -= np.matmul(v, y)
return y
[docs]
def Y_sample_orthogonalizer(y, X, y_ref, X_ref, tol=1e-12, copy=True):
r"""Orthogonalizes a matrix of targets :math:`{\mathbf{Y}}` given a reference
feature matrix :math:`{\mathbf{X}_r}` and reference target matrix
:math:`{\mathbf{Y}_r}`:
.. math::
\mathbf{Y} \leftarrow \mathbf{Y} -
\mathbf{X} \left(\mathbf{X}_{\mathbf{r}}^T
\mathbf{X}_{\mathbf{r}}\right)^{-1}\mathbf{X}_{\mathbf{r}}^T
\mathbf{Y}_{\mathbf{r}}
Parameters
----------
y : numpy.ndarray of shape (n_samples x n_properties)
property matrix
X : numpy.ndarray of shape (n_samples x n_features)
feature matrix
y_ref : numpy.ndarray of shape (n_ref x n_properties)
reference property matrix
X_ref : numpy.ndarray of shape (n_ref x n_features)
reference feature matrix
tol: float
cutoff for small eigenvalues to send to np.linalg.pinv
copy: bool
whether to return a copy of y or edit in-place, default=True
"""
y_frag = (X @ (np.linalg.lstsq(X_ref, y_ref, rcond=tol)[0])).reshape(y.shape)
if copy:
return y.copy() - y_frag
else:
y -= y_frag
return y