modify the TargetEncoder class to accept CV splitters

DuarteSJ · DuarteSJ · commit dcd46455a561 · 2025-05-06T21:18:45.000+01:00
diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py
@@ -92,11 +92,15 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
         more weight on the global target mean.
         If `"auto"`, then `smooth` is set to an empirical Bayes estimate.
 
-    cv : int, default=5
+    cv : int or cross-validation generator, default=5
         Determines the number of folds in the :term:`cross fitting` strategy used in
         :meth:`fit_transform`. For classification targets, `StratifiedKFold` is used
         and for continuous targets, `KFold` is used.
 
+        If an integer is provided, it is the number of folds.
+        If a cross-validation generator is provided, it should be compatible with
+        scikit-learn's cross-validation interface.
+
     shuffle : bool, default=True
         Whether to shuffle the data in :meth:`fit_transform` before splitting into
         folds. Note that the samples within each split will not be shuffled.
@@ -191,7 +195,10 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
         "categories": [StrOptions({"auto"}), list],
         "target_type": [StrOptions({"auto", "continuous", "binary", "multiclass"})],
         "smooth": [StrOptions({"auto"}), Interval(Real, 0, None, closed="left")],
-        "cv": [Interval(Integral, 2, None, closed="left")],
+        "cv": [
+            Interval(Integral, 2, None, closed="left"),
+            "cv_object",
+        ],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
     }