from itertools import chain
from math import comb
from typing import Iterator
from typing import Tuple
import numpy as np
from numpy.typing import NDArray
from scipy import sparse
from sklearn.preprocessing import PolynomialFeatures
from sklearn.utils.validation import check_is_fitted
from ..utils import AxesArray
from ..utils import comprehend_axes
from ..utils import wrap_axes
from ..utils._axis_conventions import AX_COORD
from .base import BaseFeatureLibrary
from .base import x_sequence_or_item
[docs]
class PolynomialLibrary(BaseFeatureLibrary, PolynomialFeatures):
"""Generate polynomial and interaction features.
This is the same as :code:`sklearn.preprocessing.PolynomialFeatures`,
but also adds the option to omit interaction features from the library.
Parameters
----------
degree : integer, optional (default 2)
The degree of the polynomial features.
include_interaction : boolean, optional (default True)
Determines whether interaction features are produced.
If false, features are all of the form ``x[i] ** k``.
interaction_only : boolean, optional (default False)
If true, only interaction features are produced: features that are
products of at most ``degree`` *distinct* input features (so not
``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
include_bias : boolean, optional (default True)
If True (default), then include a bias column, the feature in which
all polynomial powers are zero (i.e. a column of ones - acts as an
intercept term in a linear model).
order : str in {'C', 'F'}, optional (default 'C')
Order of output array in the dense case. 'F' order is faster to
compute, but may slow down subsequent estimators.
Attributes
----------
powers_ : array, shape (n_output_features, n_input_features)
powers_[i, j] is the exponent of the jth input in the ith output.
n_features_in_ : int
The total number of input features.
n_output_features_ : int
The total number of output features. This number is computed by
iterating over all appropriately sized combinations of input features.
"""
def __init__(
self,
degree=2,
include_interaction=True,
interaction_only=False,
include_bias=True,
order="C",
):
super().__init__(
degree=degree,
interaction_only=interaction_only,
include_bias=include_bias,
order=order,
)
self.include_interaction = include_interaction
@staticmethod
def _combinations(
n_features: int,
degree: int,
include_interaction: bool,
interaction_only: bool,
include_bias: bool,
) -> Iterator[Tuple[int, ...]]:
"""
Create selection tuples of input indexes for each polynomail term
Selection tuple iterates the input indexes present in a single term.
For example, (x+y+1)^2 would be in iterator of the tuples:
(), (0,), (1,), (0, 0), (0, 1), (1, 1)
1 x y x^2 x*y y^2
Order of terms is preserved regardless of include_interation.
"""
if not include_interaction:
return chain(
[()] if include_bias else [],
(
exponent * (feat_idx,)
for exponent in range(1, degree + 1)
for feat_idx in range(n_features)
),
)
return PolynomialFeatures._combinations(
n_features=n_features,
min_degree=int(not include_bias),
max_degree=degree,
interaction_only=interaction_only,
include_bias=include_bias,
)
@property
def powers_(self) -> NDArray[np.int_]:
"""
The exponents of the polynomial as an array of shape
(n_features_out, n_features_in), where each item is the exponent of the
jth input variable in the ith polynomial term.
"""
check_is_fitted(self)
combinations = self._combinations(
n_features=self.n_features_in_,
degree=self.degree,
include_interaction=self.include_interaction,
interaction_only=self.interaction_only,
include_bias=self.include_bias,
)
return np.vstack(
[np.bincount(c, minlength=self.n_features_in_) for c in combinations]
)
[docs]
def get_feature_names(self, input_features=None):
"""Return feature names for output features.
Parameters
----------
input_features : list of string, length n_features, optional
String names for input features if available. By default,
"x0", "x1", ... "xn_features" is used.
Returns
-------
output_feature_names : list of string, length n_output_features
"""
powers = self.powers_
if input_features is None:
input_features = ["x%d" % i for i in range(powers.shape[1])]
feature_names = []
for row in powers:
inds = np.where(row)[0]
if len(inds):
name = " ".join(
(
"%s^%d" % (input_features[ind], exp)
if exp != 1
else input_features[ind]
)
for ind, exp in zip(inds, row[inds])
)
else:
name = "1"
feature_names.append(name)
return feature_names
[docs]
@x_sequence_or_item
def fit(self, x_full: list[AxesArray], y=None):
"""
Compute number of output features.
Parameters
----------
x : array-like, shape (n_samples, n_features)
The data.
Returns
-------
self : instance
"""
if self.degree < 0 or not isinstance(self.degree, int):
raise ValueError("degree must be a nonnegative integer")
if (not self.include_interaction) and self.interaction_only:
raise ValueError(
"Can't have include_interaction be False and interaction_only"
" be True"
)
n_features = x_full[0].shape[AX_COORD]
combinations = self._combinations(
n_features,
self.degree,
self.include_interaction,
self.interaction_only,
self.include_bias,
)
self.n_features_in_ = n_features
self.n_output_features_ = sum(1 for _ in combinations)
return self
def n_poly_features(
n_in_feat: int,
degree: int,
include_bias: bool = False,
include_interation: bool = True,
interaction_only: bool = False,
) -> int:
"""Calculate number of polynomial features
Args:
n_in_feat: number of input features, e.g. 3 for x, y, z
degree: polynomial degree, e.g. 2 for quadratic
include_bias: whether to include a constant term
include_interaction: whether to include terms mixing multiple inputs
interaction_only: whether to omit terms of x_m * x_n^p for p > 1
"""
if not include_interation and interaction_only:
raise ValueError("Cannot set interaction only if include_interaction is False")
n_feat = include_bias
if not include_interation:
return n_feat + n_in_feat * degree
for deg in range(1, degree + 1):
if interaction_only:
n_feat += comb(n_in_feat, deg)
else:
n_feat += comb(n_in_feat + deg - 1, deg)
return n_feat
[docs]
def IdentityLibrary():
"""
Generate an identity library which maps all input features to
themselves. An alias for creating a degree-1 polynomial library
with no constant term.
"""
return PolynomialLibrary(degree=1, include_bias=False)