Source code for pysindy.optimizers.trapping_sr3

import warnings

import cvxpy as cp
import numpy as np
from scipy.linalg import cho_factor
from scipy.linalg import cho_solve
from sklearn.exceptions import ConvergenceWarning

from ..utils import reorder_constraints
from .sr3 import SR3


[docs]class TrappingSR3(SR3):
    """
    Trapping variant of sparse relaxed regularized regression.
    This optimizer can be used to identify systems with globally
    stable (bounded) solutions.

    Attempts to minimize one of two related objective functions

    .. math::

        0.5\\|y-Xw\\|^2_2 + \\lambda R(w)
        + 0.5\\|Pw-A\\|^2_2/\\eta + \\delta_0(Cw-d)
        + \\delta_{\\Lambda}(A)

    or

    .. math::

        0.5\\|y-Xw\\|^2_2 + \\lambda R(w)
        + \\delta_0(Cw-d)
        + 0.5 * maximumeigenvalue(A)/\\eta

    where :math:`R(w)` is a regularization function, which must be convex,
    :math:`\\delta_0` is an indicator function that provides a hard constraint
    of CW = d, and :math:\\delta_{\\Lambda} is a term to project the :math:`A`
    matrix onto the space of negative definite matrices.
    See the following references for more details:

        Kaptanoglu, Alan A., et al. "Promoting global stability in
        data-driven models of quadratic nonlinear dynamics."
        arXiv preprint arXiv:2105.01843 (2021).

        Zheng, Peng, et al. "A unified framework for sparse relaxed
        regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423.

        Champion, Kathleen, et al. "A unified sparse optimization framework
        to learn parsimonious physics-informed models from data."
        IEEE Access 8 (2020): 169259-169271.

    Parameters
    ----------
    evolve_w : bool, optional (default True)
        If false, don't update w and just minimize over (m, A)

    threshold : float, optional (default 0.1)
        Determines the strength of the regularization. When the
        regularization function R is the L0 norm, the regularization
        is equivalent to performing hard thresholding, and lambda
        is chosen to threshold at the value given by this parameter.
        This is equivalent to choosing lambda = threshold^2 / (2 * nu).

    eta : float, optional (default 1.0e20)
        Determines the strength of the stability term ||Pw-A||^2 in the
        optimization. The default value is very large so that the
        algorithm default is to ignore the stability term. In this limit,
        this should be approximately equivalent to the ConstrainedSR3 method.

    alpha_m : float, optional (default eta * 0.1)
        Determines the step size in the prox-gradient descent over m.
        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.

    alpha_A : float, optional (default eta)
        Determines the step size in the prox-gradient descent over A.
        For convergence, need alpha_A <= eta, so typically
        alpha_A = eta is used.

    gamma : float, optional (default 0.1)
        Determines the negative interval that matrix A is projected onto.
        For most applications gamma = 0.1 - 1.0 works pretty well.

    tol : float, optional (default 1e-5)
        Tolerance used for determining convergence of the optimization
        algorithm over w.

    tol_m : float, optional (default 1e-5)
        Tolerance used for determining convergence of the optimization
        algorithm over m.

    thresholder : string, optional (default 'L1')
        Regularization function to use. For current trapping SINDy,
        only the L1 and L2 norms are implemented. Note that other convex norms
        could be straightforwardly implemented, but L0 requires
        reformulation because of nonconvexity.

    thresholds : np.ndarray, shape (n_targets, n_features), optional \
            (default None)
        Array of thresholds for each library function coefficient.
        Each row corresponds to a measurement variable and each column
        to a function from the feature library.
        Recall that SINDy seeks a matrix :math:`\\Xi` such that
        :math:`\\dot{X} \\approx \\Theta(X)\\Xi`.
        ``thresholds[i, j]`` should specify the threshold to be used for the
        (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the
        threshold to be used for the (j + 1)st library function in the equation
        for the (i + 1)st measurement variable.

    eps_solver : float, optional (default 1.0e-7)
        If threshold != 0, this specifies the error tolerance in the
        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.

    relax_optim : bool, optional (default True)
        If relax_optim = True, use the relax-and-split method. If False,
        try a direct minimization on the largest eigenvalue.

    inequality_constraints : bool, optional (default False)
        If True, relax_optim must be false or relax_optim = True
        AND threshold != 0, so that the CVXPY methods are used.

    max_iter : int, optional (default 30)
        Maximum iterations of the optimization algorithm.

    accel : bool, optional (default False)
        Whether or not to use accelerated prox-gradient descent for (m, A).

    m0 : np.ndarray, shape (n_targets), optional (default None)
        Initial guess for vector m in the optimization. Otherwise
        each component of m is randomly initialized in [-1, 1].

    A0 : np.ndarray, shape (n_targets, n_targets), optional (default None)
        Initial guess for vector A in the optimization. Otherwise
        A is initialized as A = diag(gamma).

    fit_intercept : boolean, optional (default False)
        Whether to calculate the intercept for this model. If set to false, no
        intercept will be used in calculations.

    copy_X : boolean, optional (default True)
        If True, X will be copied; else, it may be overwritten.

    normalize_columns : boolean, optional (default False)
        Normalize the columns of x (the SINDy library terms) before regression
        by dividing by the L2-norm. Note that the 'normalize' option in sklearn
        is deprecated in sklearn versions >= 1.0 and will be removed.

    verbose : bool, optional (default False)
        If True, prints out the different error terms every
        max_iter / 10 iterations.

    verbose_cvxpy : bool, optional (default False)
        Boolean flag which is passed to CVXPY solve function to indicate if
        output should be verbose or not. Only relevant for optimizers that
        use the CVXPY package in some capabity.

    Attributes
    ----------
    coef_ : array, shape (n_features,) or (n_targets, n_features)
        Regularized weight vector(s). This is the v in the objective
        function.

    history_ : list
        History of sparse coefficients. ``history_[k]`` contains the
        sparse coefficients (v in the optimization objective function)
        at iteration k.

    objective_history_ : list
        History of the value of the objective at each step. Note that
        the trapping SINDy problem is nonconvex, meaning that this value
        may increase and decrease as the algorithm works.

    A_history_ : list
        History of the auxiliary variable A that approximates diag(PW).

    m_history_ : list
        History of the shift vector m that determines the origin of the
        trapping region.

    PW_history_ : list
        History of PW = A^S, the quantity we are attempting to make
        negative definite.

    PWeigs_history_ : list
        History of diag(PW), a list of the eigenvalues of A^S at
        each iteration. Tracking this allows us to ascertain if
        A^S is indeed being pulled towards the space of negative
        definite matrices.

    PL_unsym_ : np.ndarray, shape (n_targets, n_targets, n_targets, n_features)
        Unsymmetrized linear coefficient part of the P matrix in ||Pw - A||^2

    PL_ : np.ndarray, shape (n_targets, n_targets, n_targets, n_features)
        Linear coefficient part of the P matrix in ||Pw - A||^2

    PQ_ : np.ndarray, shape (n_targets, n_targets,
                            n_targets, n_targets, n_features)
        Quadratic coefficient part of the P matrix in ||Pw - A||^2

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.integrate import odeint
    >>> from pysindy import SINDy
    >>> from pysindy.optimizers import TrappingSR3
    >>> lorenz = lambda z,t : [10*(z[1] - z[0]),
    >>>                        z[0]*(28 - z[2]) - z[1],
    >>>                        z[0]*z[1] - 8/3*z[2]]
    >>> t = np.arange(0,2,.002)
    >>> x = odeint(lorenz, [-8,8,27], t)
    >>> opt = TrappingSR3(threshold=0.1)
    >>> model = SINDy(optimizer=opt)
    >>> model.fit(x, t=t[1]-t[0])
    >>> model.print()
    x0' = -10.004 1 + 10.004 x0
    x1' = 27.994 1 + -0.993 x0 + -1.000 1 x1
    x2' = -2.662 x1 + 1.000 1 x0
    """

    def __init__(
        self,
        evolve_w=True,
        threshold=0.1,
        eps_solver=1e-7,
        relax_optim=True,
        inequality_constraints=False,
        eta=None,
        alpha_A=None,
        alpha_m=None,
        gamma=-0.1,
        tol=1e-5,
        tol_m=1e-5,
        thresholder="l1",
        thresholds=None,
        max_iter=30,
        accel=False,
        normalize_columns=False,
        fit_intercept=False,
        copy_X=True,
        m0=None,
        A0=None,
        objective_history=None,
        constraint_lhs=None,
        constraint_rhs=None,
        constraint_order="target",
        verbose=False,
        verbose_cvxpy=False,
    ):
        super(TrappingSR3, self).__init__(
            threshold=threshold,
            max_iter=max_iter,
            normalize_columns=normalize_columns,
            fit_intercept=fit_intercept,
            copy_X=copy_X,
            thresholder=thresholder,
            thresholds=thresholds,
            verbose=verbose,
        )
        if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
            raise ValueError("Regularizer must be (weighted) L1 or L2")
        if eta is None:
            warnings.warn(
                "eta was not set, so defaulting to eta = 1e20 "
                "with alpha_m = 1e-2 * eta, alpha_A = eta. Here eta is so "
                "large that the stability term in the optimization "
                "will be ignored."
            )
            eta = 1e20
            alpha_m = 1e18
            alpha_A = 1e20
        else:
            if alpha_m is None:
                alpha_m = eta * 1e-2
            if alpha_A is None:
                alpha_A = eta
        if eta <= 0:
            raise ValueError("eta must be positive")
        if alpha_m < 0 or alpha_m > eta:
            raise ValueError("0 <= alpha_m <= eta")
        if alpha_A < 0 or alpha_A > eta:
            raise ValueError("0 <= alpha_A <= eta")
        if gamma >= 0:
            raise ValueError("gamma must be negative")
        if tol <= 0 or tol_m <= 0 or eps_solver <= 0:
            raise ValueError("tol and tol_m must be positive")
        if not evolve_w and not relax_optim:
            raise ValueError("If doing direct solve, must evolve w")
        if inequality_constraints and relax_optim and threshold == 0.0:
            raise ValueError(
                "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
            )
        if inequality_constraints and not evolve_w:
            raise ValueError(
                "Use of inequality constraints requires solving for xi (evolve_w=True)."
            )

        self.evolve_w = evolve_w
        self.eps_solver = eps_solver
        self.relax_optim = relax_optim
        self.inequality_constraints = inequality_constraints
        self.m0 = m0
        self.A0 = A0
        self.alpha_A = alpha_A
        self.alpha_m = alpha_m
        self.eta = eta
        self.gamma = gamma
        self.tol = tol
        self.tol_m = tol_m
        self.accel = accel
        self.verbose_cvxpy = verbose_cvxpy
        self.A_history_ = []
        self.m_history_ = []
        self.PW_history_ = []
        self.PWeigs_history_ = []
        self.history_ = []
        self.objective_history = objective_history
        self.unbias = False
        self.use_constraints = (constraint_lhs is not None) and (
            constraint_rhs is not None
        )

        if self.use_constraints:
            if constraint_order not in ("feature", "target"):
                raise ValueError(
                    "constraint_order must be either 'feature' or 'target'"
                )

            self.constraint_lhs = constraint_lhs
            self.constraint_rhs = constraint_rhs
            self.unbias = False
            self.constraint_order = constraint_order

    def _set_Ptensors(self, r):
        """Make the projection tensors used for the algorithm."""
        N = int((r**2 + 3 * r) / 2.0)

        # delta_{il}delta_{jk}
        PL_tensor = np.zeros((r, r, r, N))
        PL_tensor_unsym = np.zeros((r, r, r, N))
        for i in range(r):
            for j in range(r):
                for k in range(r):
                    for kk in range(N):
                        if i == k and j == kk:
                            PL_tensor_unsym[i, j, k, kk] = 1.0

        # Now symmetrize PL
        for i in range(r):
            for j in range(N):
                PL_tensor[:, :, i, j] = 0.5 * (
                    PL_tensor_unsym[:, :, i, j] + PL_tensor_unsym[:, :, i, j].T
                )

        # if j == k, delta_{il}delta_{N-r+j,n}
        # if j != k, delta_{il}delta_{r+j+k-1,n}
        PQ_tensor = np.zeros((r, r, r, r, N))
        for i in range(r):
            for j in range(r):
                for k in range(r):
                    for kk in range(r):
                        for n in range(N):
                            if (j == k) and (n == N - r + j) and (i == kk):
                                PQ_tensor[i, j, k, kk, n] = 1.0
                            if (j != k) and (n == r + j + k - 1) and (i == kk):
                                PQ_tensor[i, j, k, kk, n] = 1 / 2

        return PL_tensor_unsym, PL_tensor, PQ_tensor

    def _bad_PL(self, PL):
        """Check if PL tensor is properly defined"""
        tol = 1e-10
        return np.any((np.transpose(PL, [1, 0, 2, 3]) - PL) > tol)

    def _bad_PQ(self, PQ):
        """Check if PQ tensor is properly defined"""
        tol = 1e-10
        return np.any((np.transpose(PQ, [0, 2, 1, 3, 4]) - PQ) > tol)

    def _check_P_matrix(self, r, n_features, N):
        """Check if P tensor is properly defined"""
        # If these tensors are not passed, or incorrect shape, assume zeros
        if self.PQ_ is None:
            self.PQ_ = np.zeros((r, r, r, r, n_features))
            warnings.warn(
                "The PQ tensor (a requirement for the stability promotion) was"
                " not set, so setting this tensor to all zeros. "
            )
        elif (self.PQ_).shape != (r, r, r, r, n_features) and (self.PQ_).shape != (
            r,
            r,
            r,
            r,
            N,
        ):
            self.PQ_ = np.zeros((r, r, r, r, n_features))
            warnings.warn(
                "The PQ tensor (a requirement for the stability promotion) was"
                " initialized with incorrect dimensions, "
                "so setting this tensor to all zeros "
                "(with the correct dimensions). "
            )
        if self.PL_ is None:
            self.PL_ = np.zeros((r, r, r, n_features))
            warnings.warn(
                "The PL tensor (a requirement for the stability promotion) was"
                " not set, so setting this tensor to all zeros. "
            )
        elif (self.PL_).shape != (r, r, r, n_features) and (self.PL_).shape != (
            r,
            r,
            r,
            N,
        ):
            self.PL_ = np.zeros((r, r, r, n_features))
            warnings.warn(
                "The PL tensor (a requirement for the stability promotion) was"
                " initialized with incorrect dimensions, "
                "so setting this tensor to all zeros "
                "(with the correct dimensions). "
            )

        # Check if the tensor symmetries are properly defined
        if self._bad_PL(self.PL_):
            raise ValueError("PL tensor was passed but the symmetries are not correct")
        if self._bad_PQ(self.PQ_):
            raise ValueError("PQ tensor was passed but the symmetries are not correct")

        # If PL/PQ finite and correct, so trapping theorem is being used,
        # then make sure library is quadratic and correct shape
        if (np.any(self.PL_ != 0.0) or np.any(self.PQ_ != 0.0)) and n_features != N:
            print(
                "The feature library is the wrong shape or not quadratic, "
                "so please correct this if you are attempting to use the "
                "trapping algorithm with the stability term included. Setting "
                "PL and PQ tensors to zeros for now."
            )
            self.PL_ = np.zeros((r, r, r, n_features))
            self.PQ_ = np.zeros((r, r, r, r, n_features))

    def _update_coef_constraints(self, H, x_transpose_y, P_transpose_A, coef_sparse):
        """Solves the coefficient update analytically if threshold = 0"""
        g = x_transpose_y + P_transpose_A / self.eta
        inv1 = np.linalg.pinv(H, rcond=1e-15)
        inv2 = np.linalg.pinv(
            self.constraint_lhs.dot(inv1).dot(self.constraint_lhs.T), rcond=1e-15
        )

        rhs = g.flatten() + self.constraint_lhs.T.dot(inv2).dot(
            self.constraint_rhs - self.constraint_lhs.dot(inv1).dot(g.flatten())
        )
        rhs = rhs.reshape(g.shape)
        return inv1.dot(rhs)

    def _update_A(self, A_old, PW):
        """Update the symmetrized A matrix"""
        eigvals, eigvecs = np.linalg.eigh(A_old)
        eigPW, eigvecsPW = np.linalg.eigh(PW)
        r = A_old.shape[0]
        A = np.diag(eigvals)
        for i in range(r):
            if eigvals[i] > self.gamma:
                A[i, i] = self.gamma
        return eigvecsPW @ A @ np.linalg.inv(eigvecsPW)

    def _convergence_criterion(self):
        """Calculate the convergence criterion for the optimization over w"""
        this_coef = self.history_[-1]
        if len(self.history_) > 1:
            last_coef = self.history_[-2]
        else:
            last_coef = np.zeros_like(this_coef)
        err_coef = np.sqrt(np.sum((this_coef - last_coef) ** 2))
        return err_coef

    def _m_convergence_criterion(self):
        """Calculate the convergence criterion for the optimization over m"""
        return np.sum(np.abs(self.m_history_[-2] - self.m_history_[-1]))

    def _objective(self, x, y, coef_sparse, A, PW, q):
        """Objective function"""
        # Compute the errors
        R2 = (y - np.dot(x, coef_sparse)) ** 2
        A2 = (A - PW) ** 2
        L1 = self.threshold * np.sum(np.abs(coef_sparse.flatten()))

        # convoluted way to print every max_iter / 10 iterations
        if self.verbose and q % max(1, self.max_iter // 10) == 0:
            row = [
                q,
                0.5 * np.sum(R2),
                0.5 * np.sum(A2) / self.eta,
                L1,
                0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1,
            ]
            print(
                "{0:10d} ... {1:10.4e} ... {2:10.4e} ... {3:10.4e}"
                " ... {4:10.4e}".format(*row)
            )
        return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1

    def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev):
        """Solve coefficient update with CVXPY if threshold != 0"""
        xi = cp.Variable(N * r)
        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
        if self.thresholder.lower() == "l1":
            cost = cost + self.threshold * cp.norm1(xi)
        elif self.thresholder.lower() == "weighted_l1":
            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
        elif self.thresholder.lower() == "l2":
            cost = cost + self.threshold * cp.norm2(xi) ** 2
        elif self.thresholder.lower() == "weighted_l2":
            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
        cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
        if self.use_constraints:
            if self.inequality_constraints:
                prob = cp.Problem(
                    cp.Minimize(cost),
                    [self.constraint_lhs @ xi <= self.constraint_rhs],
                )
            else:
                prob = cp.Problem(
                    cp.Minimize(cost),
                    [self.constraint_lhs @ xi == self.constraint_rhs],
                )
        else:
            prob = cp.Problem(cp.Minimize(cost))

        # default solver is OSQP here but switches to ECOS for L2
        try:
            prob.solve(
                eps_abs=self.eps_solver,
                eps_rel=self.eps_solver,
                verbose=self.verbose_cvxpy,
            )
        # Annoying error coming from L2 norm switching to use the ECOS
        # solver, which uses "max_iters" instead of "max_iter", and
        # similar semantic changes for the other variables.
        except TypeError:
            try:
                prob.solve(
                    abstol=self.eps_solver,
                    reltol=self.eps_solver,
                    verbose=self.verbose_cvxpy,
                )
            except cp.error.SolverError:
                print("Solver failed, setting coefs to zeros")
                xi.value = np.zeros(N * r)
        except cp.error.SolverError:
            print("Solver failed, setting coefs to zeros")
            xi.value = np.zeros(N * r)

        if xi.value is None:
            warnings.warn(
                "Infeasible solve, increase/decrease eta",
                ConvergenceWarning,
            )
            return None
        coef_sparse = (xi.value).reshape(coef_prev.shape)
        return coef_sparse

    def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
        """
        If using the relaxation formulation of trapping SINDy, solves the
        (m, A) algorithm update.
        """
        # prox-grad for (A, m)
        # Accelerated prox gradient descent
        if self.accel:
            tk = (1 + np.sqrt(1 + 4 * tk_previous**2)) / 2.0
            m_partial = m + (tk_previous - 1.0) / tk * (m - m_prev)
            tk_previous = tk
            mPQ = np.tensordot(m_partial, self.PQ_, axes=([0], [0]))
        else:
            mPQ = np.tensordot(m, self.PQ_, axes=([0], [0]))
        p = self.PL_ - mPQ
        PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
        PQW = np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1]))
        A_b = (A - PW) / self.eta
        PQWT_PW = np.tensordot(PQW, A_b, axes=([2, 1], [0, 1]))
        if self.accel:
            m_new = m_partial - self.alpha_m * PQWT_PW
        else:
            m_new = m_prev - self.alpha_m * PQWT_PW
        m_current = m_new

        # Update A
        A_new = self._update_A(A - self.alpha_A * A_b, PW)
        return m_current, m_new, A_new, tk_previous

    def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev):
        """Update for the coefficients if threshold = 0."""
        if self.use_constraints:
            coef_sparse = self._update_coef_constraints(
                H, xTy, P_transpose_A, coef_prev
            ).reshape(coef_prev.shape)
        else:
            cho = cho_factor(H)
            coef_sparse = cho_solve(cho, xTy + P_transpose_A / self.eta).reshape(
                coef_prev.shape
            )
        return coef_sparse

    def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev):
        """
        If using the direct formulation of trapping SINDy, solves the
        entire problem in CVXPY regardless of the threshold value.
        Note that this is a convex-composite (i.e. technically nonconvex)
        problem, solved in CVXPY, so convergence/quality guarantees are
        not available here!
        """
        xi = cp.Variable(N * r)
        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
        if self.thresholder.lower() == "l1":
            cost = cost + self.threshold * cp.norm1(xi)
        elif self.thresholder.lower() == "weighted_l1":
            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
        elif self.thresholder.lower() == "l2":
            cost = cost + self.threshold * cp.norm2(xi) ** 2
        elif self.thresholder.lower() == "weighted_l2":
            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
        cost = cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (r, r))) / self.eta
        if self.use_constraints:
            if self.inequality_constraints:
                prob = cp.Problem(
                    cp.Minimize(cost),
                    [self.constraint_lhs @ xi <= self.constraint_rhs],
                )
            else:
                prob = cp.Problem(
                    cp.Minimize(cost),
                    [self.constraint_lhs @ xi == self.constraint_rhs],
                )
        else:
            prob = cp.Problem(cp.Minimize(cost))

        # default solver is SCS here
        try:
            prob.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy)
        # Annoying error coming from L2 norm switching to use the ECOS
        # solver, which uses "max_iters" instead of "max_iter", and
        # similar semantic changes for the other variables.
        except TypeError:
            prob.solve(
                abstol=self.eps_solver,
                reltol=self.eps_solver,
                verbose=self.verbose_cvxpy,
            )
        except cp.error.SolverError:
            print("Solver failed, setting coefs to zeros")
            xi.value = np.zeros(N * r)

        if xi.value is None:
            print("Infeasible solve, increase/decrease eta")
            return None, None
        coef_sparse = (xi.value).reshape(coef_prev.shape)

        if np.all(self.PL_ == 0) and np.all(self.PQ_ == 0):
            return np.zeros(r), coef_sparse  # no optimization over m
        else:
            m_cp = cp.Variable(r)
            L = np.tensordot(self.PL_, coef_sparse, axes=([3, 2], [0, 1]))
            Q = np.reshape(
                np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])), (r, r * r)
            )
            Ls = 0.5 * (L + L.T).flatten()
            cost_m = cp.lambda_max(cp.reshape(Ls - m_cp @ Q, (r, r)))
            prob_m = cp.Problem(cp.Minimize(cost_m))

            # default solver is SCS here
            prob_m.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy)

            m = m_cp.value
            if m is None:
                print("Infeasible solve over m, increase/decrease eta")
                return None, coef_sparse
            return m, coef_sparse

    def _reduce(self, x, y):
        """
        Perform at most ``self.max_iter`` iterations of the
        TrappingSR3 algorithm.
        Assumes initial guess for coefficients is stored in ``self.coef_``.
        """

        n_samples, n_features = x.shape
        r = y.shape[1]
        N = int((r**2 + 3 * r) / 2.0)

        # Define PL and PQ tensors, only relevant if the stability term in
        # trapping SINDy is turned on.
        self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(r)
        # make sure dimensions/symmetries are correct
        self._check_P_matrix(r, n_features, N)

        # Set initial coefficients
        if self.use_constraints and self.constraint_order.lower() == "target":
            self.constraint_lhs = reorder_constraints(
                self.constraint_lhs, n_features, output_order="target"
            )
        coef_sparse = self.coef_.T

        # Print initial values for each term in the optimization
        if self.verbose:
            row = [
                "Iteration",
                "Data Error",
                "Stability Error",
                "L1 Error",
                "Total Error",
            ]
            print(
                "{: >10} ... {: >10} ... {: >10} ... {: >10}"
                " ... {: >10}".format(*row)
            )

        # initial A
        if self.A0 is not None:
            A = self.A0
        elif np.any(self.PQ_ != 0.0):
            A = np.diag(self.gamma * np.ones(r))
        else:
            A = np.diag(np.zeros(r))
        self.A_history_.append(A)

        # initial guess for m
        if self.m0 is not None:
            m = self.m0
        else:
            np.random.seed(1)
            m = (np.random.rand(r) - np.ones(r)) * 2
        self.m_history_.append(m)

        # Precompute some objects for optimization
        x_expanded = np.zeros((n_samples, r, n_features, r))
        for i in range(r):
            x_expanded[:, i, :, i] = x
        x_expanded = np.reshape(x_expanded, (n_samples * r, r * n_features))
        xTx = np.dot(x_expanded.T, x_expanded)
        xTy = np.dot(x_expanded.T, y.flatten())

        # if using acceleration
        tk_prev = 1
        m_prev = m

        # Begin optimization loop
        objective_history = []
        for k in range(self.max_iter):

            # update P tensor from the newest m
            mPQ = np.tensordot(m, self.PQ_, axes=([0], [0]))
            p = self.PL_ - mPQ
            Pmatrix = p.reshape(r * r, r * n_features)

            # update w
            coef_prev = coef_sparse
            if self.evolve_w:
                if self.relax_optim:
                    if self.threshold > 0.0:
                        coef_sparse = self._solve_sparse_relax_and_split(
                            r, n_features, x_expanded, y, Pmatrix, A, coef_prev
                        )
                    else:
                        pTp = np.dot(Pmatrix.T, Pmatrix)
                        H = xTx + pTp / self.eta
                        P_transpose_A = np.dot(Pmatrix.T, A.flatten())
                        coef_sparse = self._solve_nonsparse_relax_and_split(
                            H, xTy, P_transpose_A, coef_prev
                        )
                else:
                    m, coef_sparse = self._solve_direct_cvxpy(
                        r, n_features, x_expanded, y, Pmatrix, coef_prev
                    )

            # If problem over xi becomes infeasible, break out of the loop
            if coef_sparse is None:
                coef_sparse = coef_prev
                break

            if self.relax_optim:
                m_prev, m, A, tk_prev = self._solve_m_relax_and_split(
                    r, n_features, m_prev, m, A, coef_sparse, tk_prev
                )

            # If problem over m becomes infeasible, break out of the loop
            if m is None:
                m = m_prev
                break
            self.history_.append(coef_sparse.T)
            PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))

            # (m,A) update finished, append the result
            self.m_history_.append(m)
            self.A_history_.append(A)
            eigvals, eigvecs = np.linalg.eig(PW)
            self.PW_history_.append(PW)
            self.PWeigs_history_.append(np.sort(eigvals))

            # update objective
            objective_history.append(self._objective(x, y, coef_sparse, A, PW, k))

            if (
                self._m_convergence_criterion() < self.tol_m
                and self._convergence_criterion() < self.tol
            ):
                # Could not (further) select important features
                break
        if k == self.max_iter - 1:
            warnings.warn(
                "TrappingSR3._reduce did not converge after {} iters.".format(
                    self.max_iter
                ),
                ConvergenceWarning,
            )

        self.coef_ = coef_sparse.T
        self.objective_history = objective_history