skops-dev · adrinjalali · Dec 2, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,14 @@ filterwarnings = [
     "ignore:DataFrameGroupBy.apply operated on the grouping columns.:DeprecationWarning",
     # Ignore Pandas 2.2 warning on PyArrow. It might be reverted in a later release.
     "ignore:\\s*Pyarrow will become a required dependency of pandas.*:DeprecationWarning",
+    # LightGBM sklearn 1.6 deprecation warning, fixed in the next release
+    "ignore:'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.:FutureWarning",
+    # RandomForestQuantileRegressor tags deprecation warning in sklearn 1.7
+    "ignore:The RandomForestQuantileRegressor or classes from which it inherits use `_get_tags` and `_more_tags`:FutureWarning",
+    # ExtraTreesQuantileRegressor tags deprecation warning in sklearn 1.7
+    "ignore:The ExtraTreesQuantileRegressor or classes from which it inherits use `_get_tags` and `_more_tags`:FutureWarning",
+    # BaseEstimator._validate_data deprecation warning in sklearn 1.6 #TODO can be removed when a new release of quantile-forest is out
+    "ignore:`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7:FutureWarning",
 ]
 markers = [
     "network: marks tests as requiring internet (deselect with '-m \"not network\"')",

diff --git a/scripts/check_file_size.py b/scripts/check_file_size.py
@@ -20,7 +20,6 @@
 from zipfile import ZIP_DEFLATED, ZipFile
 
 import pandas as pd
-from sklearn.utils._tags import _safe_tags
 from sklearn.utils._testing import set_random_state
 
 import skops.io as sio
@@ -29,6 +28,7 @@
     _tested_estimators,
     get_input,
 )
+from skops.utils._fixes import get_tags
 
 TOPK = 10  # number of largest estimators reported
 MAX_ALLOWED_SIZE = 1024  # maximum allowed file size in kb
@@ -46,8 +46,7 @@ def check_file_size() -> None:
         set_random_state(estimator, random_state=0)
 
         X, y = get_input(estimator)
-        tags = _safe_tags(estimator)
-        if tags.get("requires_fit", True):
+        if get_tags(estimator).requires_fit:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", module="sklearn")
                 if y is not None:

diff --git a/scripts/check_persistence_performance.py b/scripts/check_persistence_performance.py
@@ -15,7 +15,6 @@
 from typing import Any
 
 import pandas as pd
-from sklearn.utils._tags import _safe_tags
 from sklearn.utils._testing import set_random_state
 
 import skops.io as sio
@@ -24,6 +23,7 @@
     _tested_estimators,
     get_input,
 )
+from skops.utils._fixes import get_tags
 
 ATOL = 1  # seconds absolute difference allowed at max
 NUM_REPS = 10  # number of times the check is repeated
@@ -43,8 +43,7 @@ def check_persist_performance() -> None:
         set_random_state(estimator, random_state=0)
 
         X, y = get_input(estimator)
-        tags = _safe_tags(estimator)
-        if tags.get("requires_fit", True):
+        if get_tags(estimator).requires_fit:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", module="sklearn")
                 if y is not None:

diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py
@@ -33,7 +33,9 @@
     # required for persistence tests of external libraries
     "lightgbm": ("3", "tests", None),
     "xgboost": ("1.6", "tests", None),
-    "catboost": ("1.0", "tests", None),
+    # remove python constraint when catboost supports 3.13
+    # https://github.com/catboost/catboost/issues/2748
+    "catboost": ("1.0", "tests", 'python_version < "3.13"'),
     "fairlearn": ("0.7.0", "docs, tests", None),
     "rich": ("12", "tests, rich", None),
 }

diff --git a/skops/io/_sklearn.py b/skops/io/_sklearn.py
@@ -3,44 +3,94 @@
 from typing import Any, Optional, Sequence, Type
 
 from sklearn.cluster import Birch
+from sklearn.tree._tree import Tree
 
-from ._general import TypeNode
+from ._audit import Node, get_tree
+from ._general import TypeNode, unsupported_get_state
 from ._protocol import PROTOCOL
+from ._utils import LoadContext, SaveContext, get_module, get_state, gettype
+from .exceptions import UnsupportedTypeException
 
 try:
     # TODO: remove once support for sklearn<1.2 is dropped. See #187
     from sklearn.covariance._graph_lasso import _DictWithDeprecatedKeys
 except ImportError:
     _DictWithDeprecatedKeys = None
+
 from sklearn.linear_model._sgd_fast import (
     EpsilonInsensitive,
     Hinge,
-    Huber,
-    Log,
-    LossFunction,
     ModifiedHuber,
     SquaredEpsilonInsensitive,
     SquaredHinge,
-    SquaredLoss,
 )
-from sklearn.tree._tree import Tree
 
-from ._audit import Node, get_tree
-from ._general import unsupported_get_state
-from ._utils import LoadContext, SaveContext, get_module, get_state, gettype
-from .exceptions import UnsupportedTypeException
-
-ALLOWED_SGD_LOSSES = {
-    ModifiedHuber,
-    Hinge,
-    SquaredHinge,
-    Log,
-    SquaredLoss,
-    Huber,
+ALLOWED_LOSSES = {
     EpsilonInsensitive,
+    Hinge,
+    ModifiedHuber,
     SquaredEpsilonInsensitive,
+    SquaredHinge,
 }
 
+try:
+    # TODO: remove once support for sklearn<1.6 is dropped.
+    from sklearn.linear_model._sgd_fast import (
+        Huber,
+        Log,
+        SquaredLoss,
+    )
+
+    ALLOWED_LOSSES |= {
+        Huber,
+        Log,
+        SquaredLoss,
+    }
+except ImportError:
+    pass
+
+try:
+    # sklearn>=1.6
+    from sklearn._loss._loss import (
+        CyAbsoluteError,
+        CyExponentialLoss,
+        CyHalfBinomialLoss,
+        CyHalfGammaLoss,
+        CyHalfMultinomialLoss,
+        CyHalfPoissonLoss,
+        CyHalfSquaredError,
+        CyHalfTweedieLoss,
+        CyHalfTweedieLossIdentity,
+        CyHuberLoss,
+        CyPinballLoss,
+    )
+
+    ALLOWED_LOSSES |= {
+        CyAbsoluteError,
+        CyExponentialLoss,
+        CyHalfBinomialLoss,
+        CyHalfGammaLoss,
+        CyHalfMultinomialLoss,
+        CyHalfPoissonLoss,
+        CyHalfSquaredError,
+        CyHalfTweedieLoss,
+        CyHalfTweedieLossIdentity,
+        CyHuberLoss,
+        CyPinballLoss,
+    }
+except ImportError:
+    pass
+
+# This import is for the parent class of all loss functions, which is used to
+# set the dispatch function for all loss functions.
+try:
+    # From sklearn>=1.6
+    from sklearn._loss._loss import CyLossFunction as ParentLossClass
+except ImportError:
+    # sklearn<1.6
+    from sklearn.linear_model._sgd_fast import LossFunction as ParentLossClass
+
+
 UNSUPPORTED_TYPES = {Birch}
 
 
@@ -163,13 +213,13 @@ def __init__(
         super().__init__(state, load_context, constructor=Tree, trusted=self.trusted)
 
 
-def sgd_loss_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
+def loss_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
     state = reduce_get_state(obj, save_context)
-    state["__loader__"] = "SGDNode"
+    state["__loader__"] = "LossNode"
     return state
 
 
-class SGDNode(ReduceNode):
+class LossNode(ReduceNode):
     def __init__(
         self,
         state: dict[str, Any],
@@ -178,7 +228,7 @@ def __init__(
     ) -> None:
         # TODO: make sure trusted here makes sense and used.
         self.trusted = self._get_trusted(
-            trusted, [get_module(x) + "." + x.__name__ for x in ALLOWED_SGD_LOSSES]
+            trusted, [get_module(x) + "." + x.__name__ for x in ALLOWED_LOSSES]
         )
         super().__init__(
             state,
@@ -240,15 +290,16 @@ def _construct(self):
 
 # tuples of type and function that gets the state of that type
 GET_STATE_DISPATCH_FUNCTIONS = [
-    (LossFunction, sgd_loss_get_state),
+    (ParentLossClass, loss_get_state),
     (Tree, tree_get_state),
 ]
+
 for type_ in UNSUPPORTED_TYPES:
     GET_STATE_DISPATCH_FUNCTIONS.append((type_, unsupported_get_state))
 
 # tuples of type and function that creates the instance of that type
-NODE_TYPE_MAPPING = {
-    ("SGDNode", PROTOCOL): SGDNode,
+NODE_TYPE_MAPPING: dict[tuple[str, int], Any] = {
+    ("LossNode", PROTOCOL): LossNode,
     ("TreeNode", PROTOCOL): TreeNode,
 }