From 066bde3309b6b1b685298d4226152bca65067825 Mon Sep 17 00:00:00 2001
From: statmlben <bdai.hk@protonmail.com>
Date: Tue, 13 Jan 2026 11:56:21 +0800
Subject: [PATCH 1/2] Add monotonic constraint docs and apply code formatting

- Document monotonic constraint option in constraint.rst
- Add tau parameter documentation for Huber loss
- Apply consistent code formatting (trailing whitespace, line wrapping)
- Mark GridSearchCV as complete in to-do.md
---
 doc/source/tutorials/constraint.rst |  11 +-
 doc/source/tutorials/loss.rst       |   9 +-
 rehline/_base.py                    | 298 +++++++++++++++++-----------
 rehline/_sklearn_mixin.py           | 117 ++++++-----
 to-do.md                            |   4 +-
 5 files changed, 268 insertions(+), 171 deletions(-)

diff --git a/doc/source/tutorials/constraint.rst b/doc/source/tutorials/constraint.rst
index 4e3f9b5a..024b3841 100644
--- a/doc/source/tutorials/constraint.rst
+++ b/doc/source/tutorials/constraint.rst
@@ -7,8 +7,8 @@ Usage
 -----
 
 .. code:: python
-   
-   # list of 
+
+   # list of
    # name (str): name of the custom linear constraints
    # loss_kwargs: more keys and values for constraint parameters
    constraint = [{'name': <constraint_name>, <**constraint_kwargs>}, ...]
@@ -20,7 +20,7 @@ Usage
 
  * - constraint
    - | args
-   - | Example 
+   - | Example
 
  * - **nonnegative**
    - | ``name``: 'nonnegative' or '>=0'
@@ -32,6 +32,11 @@ Usage
      | ``tol_sen``: 1d array [p] of tolerance for fairness
    - | ``constraint=[{'name': 'fair', 'sen_idx': sen_idx, 'tol_sen': tol_sen}]``
 
+ * - **monotonic**
+   - | ``name``: 'monotonic' or 'monotonicity'
+     | ``decreasing`` (*bool*): False (default)
+   - | ``constraint=[{'name': 'monotonic', 'decreasing': True}]``
+
  * - **custom**
    - | ``name``: 'custom'
      | ``A``: 2d array [K x d] for linear constraint coefficients
diff --git a/doc/source/tutorials/loss.rst b/doc/source/tutorials/loss.rst
index 1fe815c2..23bd9f08 100644
--- a/doc/source/tutorials/loss.rst
+++ b/doc/source/tutorials/loss.rst
@@ -7,7 +7,7 @@ Usage
 -----
 
 .. code:: python
-   
+
    # name (str): name of the custom loss function
    # loss_kwargs: more keys and values for loss parameters
    loss = {'name': <loss_name>, <**loss_kwargs>}
@@ -24,7 +24,7 @@ Classification loss
 
  * - loss
    - | args
-   - | Example 
+   - | Example
 
  * - **SVM**
    - | ``name``: 'hinge' / 'svm' / 'SVM'
@@ -48,7 +48,7 @@ Regression loss
 
  * - loss
    - | args
-   - | Example 
+   - | Example
 
  * - **Quantile Reg**
    - | ``name``: 'check' / 'quantile' / 'QR'
@@ -57,7 +57,8 @@ Regression loss
 
  * - **Huber**
    - | ``name``: 'huber' / 'Huber'
-   - | ``loss={'name': 'huber'}``
+     | ``tau`` (*float*): 1.0 (default)
+   - | ``loss={'name': 'huber', 'tau': 1.0}``
 
  * - **SVR**
    - | ``name``: 'SVR' / 'svr'
diff --git a/rehline/_base.py b/rehline/_base.py
index ea03f274..7de3c28f 100644
--- a/rehline/_base.py
+++ b/rehline/_base.py
@@ -20,26 +20,26 @@ class _BaseReHLine(BaseEstimator):
 
     .. math::
 
-        \min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2, \\ \text{ s.t. } 
+        \min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2, \\ \text{ s.t. }
         \mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},
-        
-    where :math:`\mathbf{U} = (u_{li}),\mathbf{V} = (v_{li}) \in \mathbb{R}^{L \times n}` 
-    and :math:`\mathbf{S} = (s_{hi}),\mathbf{T} = (t_{hi}),\mathbf{\tau} = (\tau_{hi}) \in \mathbb{R}^{H \times n}` 
+
+    where :math:`\mathbf{U} = (u_{li}),\mathbf{V} = (v_{li}) \in \mathbb{R}^{L \times n}`
+    and :math:`\mathbf{S} = (s_{hi}),\mathbf{T} = (t_{hi}),\mathbf{\tau} = (\tau_{hi}) \in \mathbb{R}^{H \times n}`
     are the ReLU-ReHU loss parameters, and :math:`(\mathbf{A},\mathbf{b})` are the constraint parameters.
-    
+
     Parameters
     ----------
 
     C : float, default=1.0
         Regularization parameter. The strength of the regularization is
-        inversely proportional to C. Must be strictly positive. 
+        inversely proportional to C. Must be strictly positive.
 
     U, V: array of shape (L, n_samples), default=np.empty(shape=(0, 0))
         The parameters pertaining to the ReLU part in the loss function.
 
     Tau, S, T: array of shape (H, n_samples), default=np.empty(shape=(0, 0))
         The parameters pertaining to the ReHU part in the loss function.
-    
+
     A: array of shape (K, n_features), default=np.empty(shape=(0, 0))
         The coefficient matrix in the linear constraint.
 
@@ -48,11 +48,18 @@ class _BaseReHLine(BaseEstimator):
 
     """
 
-    def __init__(self, *, C=1.,
-                       U=np.empty(shape=(0,0)), V=np.empty(shape=(0,0)),
-                       Tau=np.empty(shape=(0,0)),
-                       S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)),
-                       A=np.empty(shape=(0,0)), b=np.empty(shape=(0))):
+    def __init__(
+        self,
+        *,
+        C=1.0,
+        U=np.empty(shape=(0, 0)),
+        V=np.empty(shape=(0, 0)),
+        Tau=np.empty(shape=(0, 0)),
+        S=np.empty(shape=(0, 0)),
+        T=np.empty(shape=(0, 0)),
+        A=np.empty(shape=(0, 0)),
+        b=np.empty(shape=(0)),
+    ):
         self.C = C
         self._U = U
         self._V = V
@@ -67,15 +74,15 @@ def __init__(self, *, C=1.,
 
     def get_params(self, deep=True):
         """Get parameters for this estimator.
-        
+
         Override the default get_params to exclude computation-only parameters.
-        
+
         Parameters
         ----------
         deep : bool, default=True
             If True, will return the parameters for this estimator and
             contained subobjects that are estimators.
-            
+
         Returns
         -------
         params : dict
@@ -83,11 +90,26 @@ def get_params(self, deep=True):
         """
         out = dict()
         for key in self._get_param_names():
-            if key not in ['U', 'V', 'S', 'T', 'Tau', 'A', 'b', 'Lambda', 'Gamma', 'xi']:
+            if key not in [
+                "U",
+                "V",
+                "S",
+                "T",
+                "Tau",
+                "A",
+                "b",
+                "Lambda",
+                "Gamma",
+                "xi",
+            ]:
                 value = getattr(self, key)
-                if deep and hasattr(value, 'get_params') and not isinstance(value, type):
+                if (
+                    deep
+                    and hasattr(value, "get_params")
+                    and not isinstance(value, type)
+                ):
                     deep_items = value.get_params().items()
-                    out.update((key + '__' + k, val) for k, val in deep_items)
+                    out.update((key + "__" + k, val) for k, val in deep_items)
                 out[key] = value
         return out
 
@@ -131,10 +153,10 @@ def cast_sample_weight(self, sample_weight=None):
         the sample weight with the ReLU and ReHU parameters. If sample_weight is None,
         then the sample weight is set to the weight parameter C.
         """
-        
+
         self.auto_shape()
-        
-        sample_weight = self.C*sample_weight
+
+        sample_weight = self.C * sample_weight
 
         if self.L > 0:
             U_weight = self._U * sample_weight
@@ -173,9 +195,9 @@ def call_ReLHLoss(self, score):
         relu_input = np.zeros((self.L, n))
         rehu_input = np.zeros((self.H, n))
         if self.L > 0:
-            relu_input = (self._U.T * score[:,np.newaxis]).T + self._V
+            relu_input = (self._U.T * score[:, np.newaxis]).T + self._V
         if self.H > 0:
-            rehu_input = (self._S.T * score[:,np.newaxis]).T + self._T
+            rehu_input = (self._S.T * score[:, np.newaxis]).T + self._T
         return np.sum(_relu(relu_input), 0) + np.sum(_rehu(rehu_input), 0)
 
     @abstractmethod
@@ -201,6 +223,7 @@ def decision_function(self, X):
 
         X = check_array(X)
 
+
 def _relu(x):
     """
     Evaluation of ReLU given a vector.
@@ -236,7 +259,7 @@ def _rehu(x, cut=1):
 
     Returns
     -------
-    array of shape (n_samples, ) 
+    array of shape (n_samples, )
         The result of the ReHU function.
 
     """
@@ -246,31 +269,49 @@ def _rehu(x, cut=1):
     u = np.maximum(x, 0)
     return huber(cut, u)
 
+
 def _check_relu(relu_coef, relu_intercept):
-    assert relu_coef.shape == relu_intercept.shape, "`relu_coef` and `relu_intercept` should be the same shape!"
+    assert relu_coef.shape == relu_intercept.shape, (
+        "`relu_coef` and `relu_intercept` should be the same shape!"
+    )
+
 
 def _check_rehu(rehu_coef, rehu_intercept, rehu_cut):
-    assert rehu_coef.shape == rehu_intercept.shape, "`rehu_coef` and `rehu_intercept` should be the same shape!"
+    assert rehu_coef.shape == rehu_intercept.shape, (
+        "`rehu_coef` and `rehu_intercept` should be the same shape!"
+    )
     if len(rehu_coef) > 0:
         assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!"
 
 
-def ReHLine_solver(X, U, V,
-        Tau=np.empty(shape=(0, 0)),
-        S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)),
-        A=np.empty(shape=(0, 0)), b=np.empty(shape=(0)),
-        Lambda=np.empty(shape=(0, 0)),
-        Gamma=np.empty(shape=(0, 0)),
-        xi=np.empty(shape=(0, 0)),
-        max_iter=1000, tol=1e-4, shrink=1, verbose=1, trace_freq=100):
+def ReHLine_solver(
+    X,
+    U,
+    V,
+    Tau=np.empty(shape=(0, 0)),
+    S=np.empty(shape=(0, 0)),
+    T=np.empty(shape=(0, 0)),
+    A=np.empty(shape=(0, 0)),
+    b=np.empty(shape=(0)),
+    Lambda=np.empty(shape=(0, 0)),
+    Gamma=np.empty(shape=(0, 0)),
+    xi=np.empty(shape=(0, 0)),
+    max_iter=1000,
+    tol=1e-4,
+    shrink=1,
+    verbose=1,
+    trace_freq=100,
+):
     result = rehline_result()
-    if len(Lambda)>0:
+    if len(Lambda) > 0:
         result.Lambda = np.maximum(0, np.minimum(Lambda, 1.0))
-    if len(Gamma)>0:
+    if len(Gamma) > 0:
         result.Gamma = np.maximum(0, np.minimum(Gamma, Tau))
-    if len(xi)>0:
+    if len(xi) > 0:
         result.xi = np.maximum(xi, 0.0)
-    rehline_internal(result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq)
+    rehline_internal(
+        result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq
+    )
     return result
 
 
@@ -292,7 +333,7 @@ def _make_loss_rehline_param(loss, X, y):
     ----------
     loss : dict
         A dictionary containing the loss function parameters.
-        
+
         Keys:
             - 'name' : str, the name of the loss function (e.g. 'hinge', 'svm', 'QR', etc.)
             - 'loss_kwargs': more keys and values for loss parameters
@@ -308,33 +349,33 @@ def _make_loss_rehline_param(loss, X, y):
     n = len(y)
 
     ## initialization of ReHLine params
-    U=np.empty(shape=(0,0))
-    V=np.empty(shape=(0,0))
-    Tau=np.empty(shape=(0,0))
-    S=np.empty(shape=(0,0))
-    T=np.empty(shape=(0,0))
+    U = np.empty(shape=(0, 0))
+    V = np.empty(shape=(0, 0))
+    Tau = np.empty(shape=(0, 0))
+    S = np.empty(shape=(0, 0))
+    T = np.empty(shape=(0, 0))
 
     # _dummy_X = False
 
-    if (loss['name'] == 'hinge') or (loss['name'] == 'svm')\
-        or (loss['name'] == 'SVM'):
-        U = -y.reshape(1,-1)
-        V = (np.array(np.ones(n))).reshape(1,-1)
-    
-    elif (loss['name'] == 'check') \
-            or (loss['name'] == 'quantile') \
-            or (loss['name'] == 'quantile regression') \
-            or (loss['name'] == 'QR'):
+    if (loss["name"] == "hinge") or (loss["name"] == "svm") or (loss["name"] == "SVM"):
+        U = -y.reshape(1, -1)
+        V = (np.array(np.ones(n))).reshape(1, -1)
 
-        qt = loss['qt']
+    elif (
+        (loss["name"] == "check")
+        or (loss["name"] == "quantile")
+        or (loss["name"] == "quantile regression")
+        or (loss["name"] == "QR")
+    ):
+        qt = loss["qt"]
 
         U = np.ones((2, n))
         V = np.ones((2, n))
 
-        U[0] = - qt*U[0]
-        U[1] = (1-qt)*U[1]
-        V[0] = qt*V[0]*y
-        V[1] = -(1-qt)*V[1]*y
+        U[0] = -qt * U[0]
+        U[1] = (1 - qt) * U[1]
+        V[0] = qt * V[0] * y
+        V[1] = -(1 - qt) * V[1] * y
 
     # elif (loss['name'] == 'CQR') \
 
@@ -352,71 +393,81 @@ def _make_loss_rehline_param(loss, X, y):
 
     #         X_fake[l*n:(l+1)*n,:d] = X
     #         X_fake[l*n:(l+1)*n,d+l] = 1.
-        
-    elif (loss['name'] == 'sSVM') \
-            or (loss['name'] == 'smooth SVM') \
-            or (loss['name'] == 'smooth hinge'):
+
+    elif (
+        (loss["name"] == "sSVM")
+        or (loss["name"] == "smooth SVM")
+        or (loss["name"] == "smooth hinge")
+    ):
         S = np.ones((1, n))
         T = np.ones((1, n))
         Tau = np.ones((1, n))
-        S[0] = - y
+        S[0] = -y
 
-    elif loss['name'] == 'TV':
+    elif loss["name"] == "TV":
         U = np.ones((2, n))
         V = np.ones((2, n))
-        U[1] = - U[1]
+        U[1] = -U[1]
 
-        V[0] = - X.dot(y)
+        V[0] = -X.dot(y)
         V[1] = X.dot(y)
 
-    elif (loss['name'] == 'huber') or (loss['name'] == 'Huber'):
+    elif (loss["name"] == "huber") or (loss["name"] == "Huber"):
         S = np.ones((2, n))
         T = np.ones((2, n))
-        Tau = loss['tau'] * np.ones((2, n))
+        tau_tmp = loss.get("tau", 1.0)
+        Tau = tau_tmp * np.ones((2, n))
 
         S[0] = -S[0]
         T[0] = y
         T[1] = -y
 
-    elif (loss['name'] in ['SVR', 'svr']):
+    elif loss["name"] in ["SVR", "svr"]:
         U = np.ones((2, n))
         V = np.ones((2, n))
         U[1] = -U[1]
 
-        V[0] = -(y + loss['epsilon'])
-        V[1] =  (y - loss['epsilon'])
+        V[0] = -(y + loss["epsilon"])
+        V[1] = y - loss["epsilon"]
 
-    
-    elif (loss['name'] == 'MAE') \
-            or (loss['name'] == 'mae') \
-            or (loss['name'] == 'mean absolute error'):
+    elif (
+        (loss["name"] == "MAE")
+        or (loss["name"] == "mae")
+        or (loss["name"] == "mean absolute error")
+    ):
         U = np.array([[1.0] * n, [-1.0] * n])
-        V = np.array([-y , y])
-
-    elif (loss['name'] == 'squared SVM') \
-            or (loss['name'] == 'squared svm') \
-            or (loss['name'] == 'squared hinge'):
-        Tau = np.inf * np.ones((1, n)) 
-        S = - np.sqrt(2) * y.reshape(1,-1)
-        T = np.sqrt(2) * np.ones((1, n)) 
-
-    elif (loss['name'] == 'MSE') \
-            or (loss['name'] == 'mse') \
-            or (loss['name'] == 'mean squared error'):
-        Tau = np.inf * np.ones((2, n)) 
+        V = np.array([-y, y])
+
+    elif (
+        (loss["name"] == "squared SVM")
+        or (loss["name"] == "squared svm")
+        or (loss["name"] == "squared hinge")
+    ):
+        Tau = np.inf * np.ones((1, n))
+        S = -np.sqrt(2) * y.reshape(1, -1)
+        T = np.sqrt(2) * np.ones((1, n))
+
+    elif (
+        (loss["name"] == "MSE")
+        or (loss["name"] == "mse")
+        or (loss["name"] == "mean squared error")
+    ):
+        Tau = np.inf * np.ones((2, n))
         S = np.array([[np.sqrt(2)] * n, [-np.sqrt(2)] * n])
-        T = np.array([-np.sqrt(2) * y , np.sqrt(2) * y])
+        T = np.array([-np.sqrt(2) * y, np.sqrt(2) * y])
 
-    
     else:
-        raise Exception("Sorry, ReHLine currently does not support this loss function, \
-                        but you can manually set ReHLine params to solve the problem via `ReHLine` class.")
+        raise Exception(
+            "Sorry, ReHLine currently does not support this loss function, \
+                        but you can manually set ReHLine params to solve the problem via `ReHLine` class."
+        )
 
     return U, V, Tau, S, T
 
+
 def _make_constraint_rehline_param(constraint, X, y=None):
     """The `_make_constraint_rehline_param` function generates constraint parameters for the ReHLine solver.
-    
+
     Parameters
     ----------
     constraint : list of dict
@@ -449,31 +500,49 @@ def _make_constraint_rehline_param(constraint, X, y=None):
     b = np.empty(shape=(0))
 
     for constr_tmp in constraint:
-        if (constr_tmp['name'] == 'nonnegative') or (constr_tmp['name'] == '>=0'):
+        if (constr_tmp["name"] == "nonnegative") or (constr_tmp["name"] == ">=0"):
             A_tmp = np.identity(d)
             b_tmp = np.zeros(d)
 
-        elif (constr_tmp['name'] == 'fair') or (constr_tmp['name'] == 'fairness'):
-            sen_idx = constr_tmp['sen_idx']   # list of indices
-            tol_sen = constr_tmp['tol_sen']
+        elif (constr_tmp["name"] == "fair") or (constr_tmp["name"] == "fairness"):
+            sen_idx = constr_tmp["sen_idx"]  # list of indices
+            tol_sen = constr_tmp["tol_sen"]
             tol_sen = np.array(tol_sen).reshape(-1)
 
             X_sen = X[:, sen_idx]
             X_sen = X_sen.reshape(n, -1)
 
-            assert X_sen.shape[1] == len(tol_sen), "dim of X_sen and len of tol_sen must be equal"
+            assert X_sen.shape[1] == len(tol_sen), (
+                "dim of X_sen and len of tol_sen must be equal"
+            )
 
             A_tmp = np.repeat(X_sen.T @ X, repeats=[2], axis=0) / n
             A_tmp[::2] = -A_tmp[::2]
             b_tmp = np.repeat(tol_sen, repeats=[2], axis=0)
 
-        elif (constr_tmp['name'] == 'custom'):
-            A_tmp = constr_tmp['A']
-            b_tmp = constr_tmp['b']
+        elif (constr_tmp["name"] == "monotonic") or (
+            constr_tmp["name"] == "monotonicity"
+        ):
+            decreasing = constr_tmp.get("decreasing", False)
+            idx = np.arange(d - 1)
+            A_tmp = np.zeros((d - 1, d))
+            if decreasing:
+                A_tmp[idx, idx] = 1.0
+                A_tmp[idx, idx + 1] = -1.0
+            else:
+                A_tmp[idx, idx] = -1.0
+                A_tmp[idx, idx + 1] = 1.0
+            b_tmp = np.zeros(d - 1)
+
+        elif constr_tmp["name"] == "custom":
+            A_tmp = constr_tmp["A"]
+            b_tmp = constr_tmp["b"]
 
         else:
-            raise Exception("Sorry, ReHLine currently does not support this constraint, \
-                        but you can add it by manually setting A and b via {'name': 'custom', 'A': A, 'b': b}")
+            raise Exception(
+                "Sorry, ReHLine currently does not support this constraint, \
+                        but you can add it by manually setting A and b via {'name': 'custom', 'A': A, 'b': b}"
+            )
 
         A = np.vstack([A, A_tmp]) if A.size else A_tmp
         b = np.hstack([b, b_tmp]) if b.size else b_tmp
@@ -482,14 +551,15 @@ def _make_constraint_rehline_param(constraint, X, y=None):
 
 
 def _make_penalty_rehline_param(self, penalty=None, X=None):
-    """The `_make_penalty_rehline_param` function generates penalty parameters for the ReHLine solver.
-    """
-    raise Exception("Sorry, `_make_penalty_rehline_param` feature is currently under development.")
+    """The `_make_penalty_rehline_param` function generates penalty parameters for the ReHLine solver."""
+    raise Exception(
+        "Sorry, `_make_penalty_rehline_param` feature is currently under development."
+    )
 
 
 def _cast_sample_bias(U, V, Tau, S, T, sample_bias=None):
     """Cast sample bias to ReHLine parameters by injecting bias into V and T.
-    
+
     This function modifies the ReHLine parameters to incorporate individual
     sample biases through linear transformations of the intercept parameters.
 
@@ -521,7 +591,7 @@ def _cast_sample_bias(U, V, Tau, S, T, sample_bias=None):
     V_bias : array-like of shape (L, n_samples)
         Biased ReLU intercept vector: V + U * sample_bias
 
-    Tau_bias : array-like of shape (H, n_samples)    
+    Tau_bias : array-like of shape (H, n_samples)
         Biased ReHU cutpoint matrix, actually doesn't change
 
     S_bias : array-like of shape (H, n_samples)
@@ -535,12 +605,12 @@ def _cast_sample_bias(U, V, Tau, S, T, sample_bias=None):
     The transformation applies the sample bias through:
     - V_bias = V + U ⊙ sample_bias
     - T_bias = T + S ⊙ sample_bias
-    
+
     where ⊙ denotes element-wise multiplication with broadcasting.
     """
     if sample_bias is None:
         return U, V, Tau, S, T
-    
+
     else:
         sample_bias = sample_bias.reshape(1, -1)
         U_bias = U
@@ -574,7 +644,7 @@ def _cast_sample_weight(U, V, Tau, S, T, C=1.0, sample_weight=None):
 
     C : float, default=1.0
         Regularization parameter. The strength of the regularization is
-        inversely proportional to C. Must be strictly positive. 
+        inversely proportional to C. Must be strictly positive.
 
     sample_weight : array-like of shape (n_samples,), default=None
         Individual sample weight. If None, then samples are equally weighted.
@@ -630,7 +700,7 @@ def _cast_sample_weight(U, V, Tau, S, T, C=1.0, sample_weight=None):
 
 #     .. math::
 
-#         \min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2 + \lambda_1 \| \mathbf{\beta} \|_1, \\ \text{ s.t. } 
+#         \min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2 + \lambda_1 \| \mathbf{\beta} \|_1, \\ \text{ s.t. }
 #         \mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},
 
 #     where :math:`\lambda_1` is associated with `l1_pen`.
@@ -648,8 +718,8 @@ def _cast_sample_weight(U, V, Tau, S, T, C=1.0, sample_weight=None):
 #     -------
 
 #     X_fake: ndarray of shape (n_samples+n_features, n_features)
-#         The manipulated data matrix. It has been padded with 
-#         identity matrix, allowing the correctly structured data to be input 
+#         The manipulated data matrix. It has been padded with
+#         identity matrix, allowing the correctly structured data to be input
 #         into `self.fit` or other modelling processes.
 
 #     Examples
diff --git a/rehline/_sklearn_mixin.py b/rehline/_sklearn_mixin.py
index 690f7da5..2b992b87 100644
--- a/rehline/_sklearn_mixin.py
+++ b/rehline/_sklearn_mixin.py
@@ -1,15 +1,14 @@
 import numpy as np
 from sklearn.base import ClassifierMixin, RegressorMixin
-from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
-from sklearn.utils.multiclass import check_classification_targets
 from sklearn.preprocessing import LabelEncoder
-from sklearn.utils.class_weight import compute_class_weight
 from sklearn.utils._tags import ClassifierTags, RegressorTags
+from sklearn.utils.class_weight import compute_class_weight
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 from ._class import plqERM_Ridge
 
 
-
 class plq_Ridge_Classifier(plqERM_Ridge, ClassifierMixin):
     """
     Empirical Risk Minimization (ERM) Classifier with a Piecewise Linear-Quadratic (PLQ) loss
@@ -84,32 +83,41 @@ class plq_Ridge_Classifier(plqERM_Ridge, ClassifierMixin):
 
     Attributes
     ----------
-    coef\_ : ndarray of shape (n_features,)
+    coef_ : ndarray of shape (n_features,)
         Coefficients excluding the intercept.
 
-    intercept\_ : float
+    intercept_ : float
         Intercept term. 0.0 if ``fit_intercept=False``.
 
-    classes\_ : ndarray of shape (2,)
+    classes_ : ndarray of shape (2,)
         Unique class labels in the original label space.
 
     _label_encoder : LabelEncoder
         Encodes original labels into {0,1} for internal training.
     """
 
-    def __init__(self, 
-                 loss,
-                 constraint=[],
-                 C=1.,
-                 U=np.empty((0, 0)), V=np.empty((0, 0)),
-                 Tau=np.empty((0, 0)), S=np.empty((0, 0)), T=np.empty((0, 0)),
-                 A=np.empty((0, 0)), b=np.empty((0,)),
-                 max_iter=1000, tol=1e-4, shrink=1, warm_start=0,
-                 verbose=0, trace_freq=100,
-                 fit_intercept=True,
-                 intercept_scaling=1.0,
-                 class_weight=None):
-        
+    def __init__(
+        self,
+        loss,
+        constraint=[],
+        C=1.0,
+        U=np.empty((0, 0)),
+        V=np.empty((0, 0)),
+        Tau=np.empty((0, 0)),
+        S=np.empty((0, 0)),
+        T=np.empty((0, 0)),
+        A=np.empty((0, 0)),
+        b=np.empty((0,)),
+        max_iter=1000,
+        tol=1e-4,
+        shrink=1,
+        warm_start=0,
+        verbose=0,
+        trace_freq=100,
+        fit_intercept=True,
+        intercept_scaling=1.0,
+        class_weight=None,
+    ):
         self.loss = loss
         self.constraint = constraint
         self.C = C
@@ -163,7 +171,8 @@ def fit(self, X, y, sample_weight=None):
         """
         # Validate input (dense only) and set n_features_in_
         X, y = check_X_y(
-            X, y,
+            X,
+            y,
             accept_sparse=False,
             dtype=np.float64,
             order="C",
@@ -180,7 +189,7 @@ def fit(self, X, y, sample_weight=None):
                 f"but received {self.classes_.size} classes: {self.classes_}."
             )
 
-        # Compute class weights on original labels 
+        # Compute class weights on original labels
         if self.class_weight is not None:
             cw_vec = compute_class_weight(
                 class_weight=self.class_weight,
@@ -189,7 +198,9 @@ def fit(self, X, y, sample_weight=None):
             )
             cw_map = {c: w for c, w in zip(self.classes_, cw_vec)}
             sw_cw = np.asarray([cw_map[yi] for yi in y], dtype=np.float64)
-            sample_weight = sw_cw if sample_weight is None else (np.asarray(sample_weight) * sw_cw)
+            sample_weight = (
+                sw_cw if sample_weight is None else (np.asarray(sample_weight) * sw_cw)
+            )
 
         # Encode -> {0,1} -> {-1,+1}
         le = LabelEncoder().fit(self.classes_)
@@ -228,7 +239,9 @@ def decision_function(self, X):
         ndarray of shape (n_samples,)
             Continuous scores for each sample.
         """
-        check_is_fitted(self, attributes=["coef_", "intercept_", "_label_encoder", "classes_"])
+        check_is_fitted(
+            self, attributes=["coef_", "intercept_", "_label_encoder", "classes_"]
+        )
         X = check_array(X, accept_sparse=False, dtype=np.float64, order="C")
         return X @ self.coef_ + self.intercept_
 
@@ -254,11 +267,11 @@ def __sklearn_tags__(self):
         """
         Return scikit-learn estimator tags for compatibility.
         """
-        tags = super().__sklearn_tags__() 
+        tags = super().__sklearn_tags__()
         tags.estimator_type = "classifier"
         tags.classifier_tags = ClassifierTags()
-        tags.target_tags.required = True   
-        tags.input_tags.sparse = False     
+        tags.target_tags.required = True
+        tags.input_tags.sparse = False
         return tags
 
 
@@ -270,8 +283,8 @@ class plq_Ridge_Regressor(plqERM_Ridge, RegressorMixin):
     This wrapper adds standard sklearn conveniences while delegating loss/constraint construction
     to :class:`plqERM_Ridge` (via `_make_loss_rehline_param` / `_make_constraint_rehline_param`).
 
-    Key behavior
-    ------------
+    Notes
+    -----
     - **Intercept handling**: if ``fit_intercept=True``, a constant column (value = ``intercept_scaling``)
       is appended to the right of the design matrix before calling the base solver. The last learned
       coefficient is then split out as ``intercept_``.
@@ -293,7 +306,7 @@ class plq_Ridge_Regressor(plqERM_Ridge, RegressorMixin):
           - ``{'name': 'nonnegative'}`` or ``{'name': '>=0'}``
           - ``{'name': 'fair', 'sen_idx': list[int], 'tol_sen': list[float]}``
           - ``{'name': 'custom', 'A': ndarray[K, d], 'b': ndarray[K]}``
-          
+
         Note: when ``fit_intercept=True``, a constant column is appended **as the last column**;
         since you index sensitive columns by ``sen_idx`` on the *original* features, indices stay valid.
     C : float, default=1.0
@@ -323,11 +336,11 @@ class plq_Ridge_Regressor(plqERM_Ridge, RegressorMixin):
 
     Attributes
     ----------
-    coef\_ : ndarray of shape (n_features,)
+    coef_ : ndarray of shape (n_features,)
         Learned linear coefficients (excluding the intercept term).
-    intercept\_ : float
+    intercept_ : float
         Intercept term extracted from the last coefficient when ``fit_intercept=True``, otherwise 0.0.
-    n_features_in\_ : int
+    n_features_in_ : int
         Number of input features seen during :meth:`fit` (before intercept augmentation).
 
     Notes
@@ -337,18 +350,27 @@ class plq_Ridge_Regressor(plqERM_Ridge, RegressorMixin):
     densifies data (at the cost of memory).
     """
 
-    def __init__(self,
-                 loss={'name': 'QR', 'qt': 0.5},
-                 constraint=[],
-                 C=1.,
-                 U=np.empty((0, 0)), V=np.empty((0, 0)),
-                 Tau=np.empty((0, 0)), S=np.empty((0, 0)), T=np.empty((0, 0)),
-                 A=np.empty((0, 0)), b=np.empty((0,)),
-                 max_iter=1000, tol=1e-4, shrink=1, warm_start=0,
-                 verbose=0, trace_freq=100,
-                 fit_intercept=True,
-                 intercept_scaling=1.0):
-
+    def __init__(
+        self,
+        loss={"name": "QR", "qt": 0.5},
+        constraint=[],
+        C=1.0,
+        U=np.empty((0, 0)),
+        V=np.empty((0, 0)),
+        Tau=np.empty((0, 0)),
+        S=np.empty((0, 0)),
+        T=np.empty((0, 0)),
+        A=np.empty((0, 0)),
+        b=np.empty((0,)),
+        max_iter=1000,
+        tol=1e-4,
+        shrink=1,
+        warm_start=0,
+        verbose=0,
+        trace_freq=100,
+        fit_intercept=True,
+        intercept_scaling=1.0,
+    ):
         self.loss = loss
         self.constraint = constraint
         self.C = C
@@ -376,7 +398,6 @@ def __init__(self,
         self.fit_intercept = fit_intercept
         self.intercept_scaling = float(intercept_scaling)
 
-
     def fit(self, X, y, sample_weight=None):
         """
         If ``fit_intercept=True``, a constant column (value = ``intercept_scaling``) is appended
@@ -423,7 +444,7 @@ def fit(self, X, y, sample_weight=None):
         return self
 
     def decision_function(self, X):
-        """Compute f(X) = X @ coef\_ + intercept\_.
+        """Compute f(X) = X @ coef_ + intercept_.
 
         Parameters
         ----------
@@ -442,7 +463,7 @@ def decision_function(self, X):
     def predict(self, X):
         """
         Predict targets as the linear decision function.
-        
+
         Parameters
         ----------
         X : ndarray of shape (n_samples, n_features)
diff --git a/to-do.md b/to-do.md
index c57cef23..36e4478e 100644
--- a/to-do.md
+++ b/to-do.md
@@ -2,7 +2,7 @@
 
 ## src
 - [x] warmstarting 
-- [ ] GridSearchCV
+- [x] GridSearchCV
 
 ## Class
 - [ ] sklearn Classifier and Regressor Estimator
@@ -15,4 +15,4 @@
 
 ## Constraint
 - [ ] box constraints
-- [ ] Monotonic constraints
\ No newline at end of file
+- [ ] Monotonic constraints

From 213d10154957d7eb5edeabb27a83a37b576a98a2 Mon Sep 17 00:00:00 2001
From: statmlben <bdai.hk@protonmail.com>
Date: Tue, 13 Jan 2026 11:56:38 +0800
Subject: [PATCH 2/2] Add tests for monotonic constraint in ridge regression

---
 tests/_test_monotonic.py | 50 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 tests/_test_monotonic.py

diff --git a/tests/_test_monotonic.py b/tests/_test_monotonic.py
new file mode 100644
index 00000000..587e6abf
--- /dev/null
+++ b/tests/_test_monotonic.py
@@ -0,0 +1,50 @@
+import numpy as np
+from sklearn.datasets import make_regression
+
+from rehline import plq_Ridge_Regressor
+
+
+def test_monotonic_increasing():
+    """Test monotonic increasing constraint."""
+    # Generate synthetic data
+    X, y = make_regression(n_samples=100, n_features=10, noise=0.1, random_state=42)
+
+    # Define monotonic increasing constraint
+    constraint = [{"name": "monotonic", "decreasing": False}]
+
+    # Fit model
+    clf = plq_Ridge_Regressor(loss={"name": "huber"}, constraint=constraint, C=1.0)
+    clf.fit(X, y)
+
+    # Check if coefficients are non-decreasing
+    coef = clf.coef_
+    diffs = np.diff(coef)
+
+    # Allow for small numerical errors
+    assert np.all(diffs >= -1e-3), f"Coefficients are not monotonic increasing: {coef}"
+
+
+def test_monotonic_decreasing():
+    """Test monotonic decreasing constraint."""
+    # Generate synthetic data
+    X, y = make_regression(n_samples=100, n_features=10, noise=0.1, random_state=42)
+
+    # Define monotonic decreasing constraint
+    constraint = [{"name": "monotonic", "decreasing": True}]
+
+    # Fit model
+    clf = plq_Ridge_Regressor(loss={"name": "huber"}, constraint=constraint, C=1.0)
+    clf.fit(X, y)
+
+    # Check if coefficients are non-increasing
+    coef = clf.coef_
+    diffs = np.diff(coef)
+
+    # Allow for small numerical errors
+    assert np.all(diffs <= 1e-3), f"Coefficients are not monotonic decreasing: {coef}"
+
+
+if __name__ == "__main__":
+    test_monotonic_increasing()
+    test_monotonic_decreasing()
+    print("All monotonic constraint tests passed!")