Source code for pysindy.feature_library.generalized_library

from itertools import repeat
from typing import Optional
from typing import Sequence
from warnings import warn

import numpy as np
from sklearn.utils.validation import check_is_fitted

from ..utils import AxesArray
from .base import _unique
from .base import BaseFeatureLibrary
from .base import x_sequence_or_item
from .weak_pde_library import WeakPDELibrary


[docs]class GeneralizedLibrary(BaseFeatureLibrary): """Put multiple libraries into one library. All settings provided to individual libraries will be applied. Note that this class allows one to specifically choose which input variables are used for each library, and take tensor products of any pair of libraries. Tensored libraries inherit the same input variables specified for the individual libraries. Parameters ---------- libraries : list of libraries Library instances to be applied to the input matrix. tensor_array : 2D list of booleans, optional, (default None) Default is to not tensor any of the libraries together. Shape equal to the # of tensor libraries and the # feature libraries. Indicates which pairs of libraries to tensor product together and add to the overall library. For instance if you have 5 libraries, and want to do two tensor products, you could use the list [[1, 0, 0, 1, 0], [0, 1, 0, 1, 1]] to indicate that you want two tensored libraries from tensoring libraries 0 and 3 and libraries 1, 3, and 4. inputs_per_library : Sequence of Seqeunces of ints (default None) list that specifies which input indexes should be passed as inputs for each of the individual feature libraries. length must equal the number of feature libraries. Default is that all inputs are used for every library. Attributes ---------- self.libraries_full_: list[BaseFeatureLibrary] The fitted libraries n_features_in_ : int The total number of input features. n_output_features_ : int The total number of output features. The number of output features is the sum of the numbers of output features for each of the concatenated libraries. Examples -------- >>> import numpy as np >>> from pysindy.feature_library import FourierLibrary, CustomLibrary >>> from pysindy.feature_library import GeneralizedLibrary >>> x = np.array([[0.,-1],[1.,0.],[2.,-1.]]) >>> functions = [lambda x : np.exp(x), lambda x,y : np.sin(x+y)] >>> lib_custom = CustomLibrary(library_functions=functions) >>> lib_fourier = FourierLibrary() >>> lib_generalized = GeneralizedLibrary([lib_custom, lib_fourier]) >>> lib_generalized.fit(x) >>> lib_generalized.transform(x) """ def __init__( self, libraries: list, tensor_array=None, inputs_per_library: Optional[Sequence[Sequence[int]]] = None, exclude_libraries=[], ): if len(libraries) > 0: self.libraries = libraries if has_weak(self) and has_nonweak(self): raise ValueError( "At least one of the libraries is a weak form library, " "and at least one of the libraries is not, which will " "result in a nonsensical optimization problem. Please use " "all weak form libraries or no weak form libraries." ) else: raise ValueError( "Empty or nonsensical library list passed to this library." ) if inputs_per_library is not None: if len(inputs_per_library) != len(libraries): raise ValueError( "If specifying different inputs for each library, then " "first dimension of inputs_per_library must be equal to " "the number of libraries being used." ) if isinstance(inputs_per_library, np.ndarray): warn( "inputs_per_library should no longer be passed as a numpy array", UserWarning, ) inputs_per_library = [list(row) for row in inputs_per_library] if any(x_ind < 0 for inputs in inputs_per_library for x_ind in inputs): raise ValueError( "The inputs_per_library parameter must be a numpy array " "of integers with values between 0 and " "len(input_variables) - 1." ) if tensor_array is not None: if np.asarray(tensor_array).ndim != 2: raise ValueError("Tensor product array should be 2D list.") if np.asarray(tensor_array).shape[-1] != len(libraries): raise ValueError( "If specifying tensor products between libraries, then " "last dimension of tensor_array must be equal to the " "number of libraries being used." ) if np.any(np.ravel(tensor_array) > 1) or np.any(np.ravel(tensor_array) < 0): raise ValueError( "The tensor_array parameter must be a numpy array " "of booleans, so values must be either 0 or 1." ) for i in range(len(tensor_array)): if np.sum(tensor_array[i]) < 2: raise ValueError( "If specifying libraries to tensor together, must " "specify at least two libraries (there should be at " "least two entries with value 1 in the tensor_array)." ) self.tensor_array = tensor_array self.inputs_per_library = inputs_per_library self.exclude_libraries = exclude_libraries
[docs] @x_sequence_or_item def fit(self, x_full, y=None): """ Compute number of output features. Parameters ---------- x : array-like, shape (n_samples, n_features) The data. Returns ------- self : instance """ n_features = x_full[0].shape[x_full[0].ax_coord] self.n_features_in_ = n_features # If parameter is not set, use all the inputs if self.inputs_per_library is None: self.inputs_per_library = list( repeat(list(range(n_features)), len(self.libraries)) ) else: # Check that the numbers in inputs_per_library are sensible if any( input_ind >= n_features for input_list in self.inputs_per_library for input_ind in input_list ): raise ValueError( "Each row in inputs_per_library must consist of integers " "between 0 and the number of total input features - 1. " ) # First fit all libraries separately below, with subset of the inputs fitted_libs = [ lib.fit([x[..., _unique(self.inputs_per_library[i])] for x in x_full], y) for i, lib in enumerate(self.libraries) ] # Next, tensor some libraries and append them to the list if self.tensor_array is not None: num_tensor_prods = np.shape(self.tensor_array)[0] for i in range(num_tensor_prods): lib_inds = np.ravel(np.where(self.tensor_array[i])) library_subset = np.asarray(fitted_libs)[lib_inds] library_full = np.prod(library_subset) library_full._set_inputs_per_library( [self.inputs_per_library[lib_ind] for lib_ind in lib_inds] ) library_full.fit(x_full, y) fitted_libs.append(library_full) # Calculate the sum of output features self.n_output_features_ = sum( lib.n_output_features_ for lib in fitted_libs if lib not in self.exclude_libraries ) # Save fitted libs self.libraries_full_ = fitted_libs return self
[docs] @x_sequence_or_item def transform(self, x_full): """Transform data with libs provided below. Parameters ---------- x : array-like, shape [n_samples, n_features] The data to transform, row by row. Returns ------- xp : np.ndarray, shape [n_samples, NP] The matrix of features, where NP is the number of features generated from applying the custom functions to the inputs. """ check_is_fitted(self, attributes=["n_features_in_"]) xp_full = [] for x in x_full: n_features = x.shape[x.ax_coord] n_input_features = self.n_features_in_ if n_features != n_input_features: raise ValueError("x shape does not match training shape") xps = [] for i, lib in enumerate(self.libraries_full_): if i < len(self.inputs_per_library): if i not in self.exclude_libraries: xps.append( lib.transform( [x[..., _unique(self.inputs_per_library[i])]] )[0] ) else: xps.append(lib.transform([x])[0]) xp = AxesArray(np.concatenate(xps, axis=xps[0].ax_coord), xps[0].axes) xp_full = xp_full + [xp] return xp_full
[docs] def get_feature_names(self, input_features=None): """Return feature names for output features. Parameters ---------- input_features : list of string, length n_features, optional String names for input features if available. By default, "x0", "x1", ... "xn_features" is used. Returns ------- output_feature_names : list of string, length n_output_features """ check_is_fitted(self) feature_names = list() for i, lib in enumerate(self.libraries_full_): if i not in self.exclude_libraries: if i < len(self.libraries): if input_features is None: input_features_i = [ "x%d" % k for k in _unique(self.inputs_per_library[i]) ] else: input_features_i = np.asarray(input_features)[ _unique(self.inputs_per_library[i]) ].tolist() else: # Tensor libraries need all the inputs and then internally # handle the subsampling of the input variables if input_features is None: input_features_i = ["x{k}" for k in range(self.n_features_in_)] else: input_features_i = input_features feature_names += lib.get_feature_names(input_features_i) return feature_names
[docs] def calc_trajectory(self, diff_method, x, t): return self.libraries[0].calc_trajectory(diff_method, x, t)
[docs] def get_spatial_grid(self): for lib_k in self.libraries: spatial_grid = lib_k.get_spatial_grid() if spatial_grid is not None: return spatial_grid
[docs]def has_weak(lib): if isinstance(lib, WeakPDELibrary): return True elif hasattr(lib, "libraries_"): for lib_k in lib.libraries_: if has_weak(lib_k): return True return False
[docs]def has_nonweak(lib): if hasattr(lib, "libraries_"): for lib_k in lib.libraries_: if has_nonweak(lib_k): return True elif not isinstance(lib, WeakPDELibrary): return True return False