Source code for pysindy.feature_library.polynomial_library

from itertools import chain
from math import comb
from typing import Iterator
from typing import Tuple

import numpy as np
from numpy.typing import NDArray
from scipy import sparse
from sklearn.preprocessing import PolynomialFeatures
from sklearn.utils.validation import check_is_fitted

from ..utils import AxesArray
from ..utils import comprehend_axes
from ..utils import wrap_axes
from ..utils._axis_conventions import AX_COORD
from .base import BaseFeatureLibrary
from .base import x_sequence_or_item



[docs]
class PolynomialLibrary(BaseFeatureLibrary, PolynomialFeatures):
    """Generate polynomial and interaction features.

    This is the same as :code:`sklearn.preprocessing.PolynomialFeatures`,
    but also adds the option to omit interaction features from the library.

    Parameters
    ----------
    degree : integer, optional (default 2)
        The degree of the polynomial features.

    include_interaction : boolean, optional (default True)
        Determines whether interaction features are produced.
        If false, features are all of the form ``x[i] ** k``.

    interaction_only : boolean, optional (default False)
        If true, only interaction features are produced: features that are
        products of at most ``degree`` *distinct* input features (so not
        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).

    include_bias : boolean, optional (default True)
        If True (default), then include a bias column, the feature in which
        all polynomial powers are zero (i.e. a column of ones - acts as an
        intercept term in a linear model).

    order : str in {'C', 'F'}, optional (default 'C')
        Order of output array in the dense case. 'F' order is faster to
        compute, but may slow down subsequent estimators.

    Attributes
    ----------
    powers_ : array, shape (n_output_features, n_input_features)
        powers_[i, j] is the exponent of the jth input in the ith output.

    n_features_in_ : int
        The total number of input features.

    n_output_features_ : int
        The total number of output features. This number is computed by
        iterating over all appropriately sized combinations of input features.
    """

    def __init__(
        self,
        degree=2,
        include_interaction=True,
        interaction_only=False,
        include_bias=True,
        order="C",
    ):
        super().__init__(
            degree=degree,
            interaction_only=interaction_only,
            include_bias=include_bias,
            order=order,
        )
        self.include_interaction = include_interaction

    @staticmethod
    def _combinations(
        n_features: int,
        degree: int,
        include_interaction: bool,
        interaction_only: bool,
        include_bias: bool,
    ) -> Iterator[Tuple[int, ...]]:
        """
        Create selection tuples of input indexes for each polynomail term

        Selection tuple iterates the input indexes present in a single term.
        For example, (x+y+1)^2 would be in iterator of the tuples:
        (), (0,), (1,), (0, 0), (0, 1), (1, 1)
        1    x     y      x^2     x*y     y^2

        Order of terms is preserved regardless of include_interation.
        """
        if not include_interaction:
            return chain(
                [()] if include_bias else [],
                (
                    exponent * (feat_idx,)
                    for exponent in range(1, degree + 1)
                    for feat_idx in range(n_features)
                ),
            )
        return PolynomialFeatures._combinations(
            n_features=n_features,
            min_degree=int(not include_bias),
            max_degree=degree,
            interaction_only=interaction_only,
            include_bias=include_bias,
        )

    @property
    def powers_(self) -> NDArray[np.int_]:
        """
        The exponents of the polynomial as an array of shape
        (n_features_out, n_features_in), where each item is the exponent of the
        jth input variable in the ith polynomial term.
        """
        check_is_fitted(self)
        combinations = self._combinations(
            n_features=self.n_features_in_,
            degree=self.degree,
            include_interaction=self.include_interaction,
            interaction_only=self.interaction_only,
            include_bias=self.include_bias,
        )
        return np.vstack(
            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]
        )


[docs]
    def get_feature_names(self, input_features=None):
        """Return feature names for output features.

        Parameters
        ----------
        input_features : list of string, length n_features, optional
            String names for input features if available. By default,
            "x0", "x1", ... "xn_features" is used.

        Returns
        -------
        output_feature_names : list of string, length n_output_features
        """
        powers = self.powers_
        if input_features is None:
            input_features = ["x%d" % i for i in range(powers.shape[1])]
        feature_names = []
        for row in powers:
            inds = np.where(row)[0]
            if len(inds):
                name = " ".join(
                    (
                        "%s^%d" % (input_features[ind], exp)
                        if exp != 1
                        else input_features[ind]
                    )
                    for ind, exp in zip(inds, row[inds])
                )
            else:
                name = "1"
            feature_names.append(name)
        return feature_names



[docs]
    @x_sequence_or_item
    def fit(self, x_full: list[AxesArray], y=None):
        """
        Compute number of output features.

        Parameters
        ----------
        x : array-like, shape (n_samples, n_features)
            The data.

        Returns
        -------
        self : instance
        """
        if self.degree < 0 or not isinstance(self.degree, int):
            raise ValueError("degree must be a nonnegative integer")
        if (not self.include_interaction) and self.interaction_only:
            raise ValueError(
                "Can't have include_interaction be False and interaction_only"
                " be True"
            )
        n_features = x_full[0].shape[AX_COORD]
        combinations = self._combinations(
            n_features,
            self.degree,
            self.include_interaction,
            self.interaction_only,
            self.include_bias,
        )
        self.n_features_in_ = n_features
        self.n_output_features_ = sum(1 for _ in combinations)
        return self



[docs]
    @x_sequence_or_item
    def transform(self, x_full):
        """Transform data to polynomial features.

        Parameters
        ----------
        x_full : {array-like, sparse matrix} of shape (n_samples, n_features)
            The data to transform, row by row.

        Returns
        -------
        xp : np.ndarray or CSR/CSC sparse matrix,
                shape (n_samples, n_output_features)
            The matrix of features, where n_output_features is the number
            of polynomial features generated from the combination of inputs.
        """
        check_is_fitted(self)

        xp_full = []
        for x in x_full:
            if sparse.issparse(x) and x.format not in ["csr", "csc"]:
                # create new with correct sparse
                axes = comprehend_axes(x)
                x = x.asformat("csc")
                wrap_axes(axes, x)
            n_features = x.shape[AX_COORD]
            if n_features != self.n_features_in_:
                raise ValueError("x shape does not match training shape")

            combinations = self._combinations(
                n_features,
                self.degree,
                self.include_interaction,
                self.interaction_only,
                self.include_bias,
            )
            if sparse.isspmatrix(x):
                columns = []
                for combo in combinations:
                    if combo:
                        out_col = 1
                        for col_idx in combo:
                            out_col = x[..., col_idx].multiply(out_col)
                        columns.append(out_col)
                    else:
                        bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
                        columns.append(bias)
                xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
            else:
                xp = AxesArray(
                    np.empty(
                        (*x.shape[:-1], self.n_output_features_),
                        dtype=x.dtype,
                        order=self.order,
                    ),
                    x.axes,
                )
                for i, combo in enumerate(combinations):
                    xp[..., i] = x[..., combo].prod(-1)
            xp_full = xp_full + [xp]
        return xp_full




def n_poly_features(
    n_in_feat: int,
    degree: int,
    include_bias: bool = False,
    include_interation: bool = True,
    interaction_only: bool = False,
) -> int:
    """Calculate number of polynomial features

    Args:
        n_in_feat: number of input features, e.g. 3 for x, y, z
        degree: polynomial degree, e.g. 2 for quadratic
        include_bias: whether to include a constant term
        include_interaction: whether to include terms mixing multiple inputs
        interaction_only: whether to omit terms of x_m * x_n^p for p > 1
    """
    if not include_interation and interaction_only:
        raise ValueError("Cannot set interaction only if include_interaction is False")
    n_feat = include_bias
    if not include_interation:
        return n_feat + n_in_feat * degree
    for deg in range(1, degree + 1):
        if interaction_only:
            n_feat += comb(n_in_feat, deg)
        else:
            n_feat += comb(n_in_feat + deg - 1, deg)
    return n_feat



[docs]
def IdentityLibrary():
    """
    Generate an identity library which maps all input features to
    themselves. An alias for creating a degree-1 polynomial library
    with no constant term.
    """
    return PolynomialLibrary(degree=1, include_bias=False)