scikit-learn
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 0 deletions b/‎doc/conf.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/model_persistence.rst
Lines changed: 31 additions & 0 deletions b/‎doc/model_persistence.rst
Lines changed: 31 additions & 0 deletions
diff --git a/‎doc/related_projects.rst
Lines changed: 6 additions & 0 deletions b/‎doc/related_projects.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎doc/whats_new/v1.3.rst
Lines changed: 12 additions & 1 deletion b/‎doc/whats_new/v1.3.rst
Lines changed: 12 additions & 1 deletion
diff --git a/‎sklearn/compose/_column_transformer.py
Lines changed: 19 additions & 10 deletions b/‎sklearn/compose/_column_transformer.py
Lines changed: 19 additions & 10 deletions
diff --git a/‎sklearn/compose/tests/test_column_transformer.py
Lines changed: 17 additions & 18 deletions b/‎sklearn/compose/tests/test_column_transformer.py
Lines changed: 17 additions & 18 deletions
diff --git a/‎sklearn/datasets/_kddcup99.py
Lines changed: 1 addition & 0 deletions b/‎sklearn/datasets/_kddcup99.py
Lines changed: 1 addition & 0 deletions
@@ -331,6 +331,7 @@
     "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
     "joblib": ("https://joblib.readthedocs.io/en/latest/", None),
     "seaborn": ("https://seaborn.pydata.org/", None),
+    "skops": ("https://skops.readthedocs.io/en/stable/", None),
 }
 
 v = parse(release)
 
@@ -92,6 +92,37 @@ serialization methods, please refer to this
 `talk by Alex Gaynor
 <https://pyvideo.org/video/2566/pickles-are-for-delis-not-software>`_.
 
+
+A more secure format: `skops`
+.............................
+
+`skops <https://skops.readthedocs.io/en/stable/>`__ provides a more secure
+format via the :mod:`skops.io` module. It avoids using :mod:`pickle` and only
+loads files which have types and references to functions which are trusted
+either by default or by the user. The API is very similar to ``pickle``, and
+you can persist your models as explain in the `docs
+<https://skops.readthedocs.io/en/stable/persistence.html>`__ using
+:func:`skops.io.dump` and :func:`skops.io.dumps`::
+
+    import skops.io as sio
+    obj = sio.dumps(clf)
+
+And you can load them back using :func:`skops.io.load` and
+:func:`skops.io.loads`. However, you need to specify the types which are
+trusted by you. You can get existing unknown types in a dumped object / file
+using :func:`skops.io.get_untrusted_types`, and after checking its contents,
+pass it to the load function::
+
+    unknown_types = sio.get_untrusted_types(obj)
+    clf = sio.loads(obj, trusted=unknown_types)
+
+If you trust the source of the file / object, you can pass ``trusted=True``::
+
+    clf = sio.loads(obj, trusted=True)
+
+Please report issues and feature requests related to this format on the `skops
+issue tracker <https://github.com/skops-dev/skops/issues>`__.
+
 Interoperable formats
 ---------------------
 
 
@@ -115,6 +115,10 @@ enhance the functionality of scikit-learn's estimators.
   Scikit-learn pipelines to `ONNX <https://onnx.ai/>`_ for interchange and
   prediction.
 
+` `skops.io <https://skops.readthedocs.io/en/stable/persistence.html>`__ A
+  persistence model more secure than pickle, which can be used instead of
+  pickle in most common cases.
+
 - `sklearn2pmml <https://github.com/jpmml/sklearn2pmml>`_
   Serialization of a wide variety of scikit-learn estimators and transformers
   into PMML with the help of `JPMML-SkLearn <https://github.com/jpmml/jpmml-sklearn>`_
@@ -356,6 +360,8 @@ and promote community efforts.
   (`source <https://github.com/mehrdad-dev/scikit-learn>`__)
 - `Spanish translation <https://qu4nt.github.io/sklearn-doc-es/>`_
   (`source <https://github.com/qu4nt/sklearn-doc-es>`__)
+- `Korean translation <https://panda5176.github.io/scikit-learn-korean/>`_
+  (`source <https://github.com/panda5176/scikit-learn-korean>`__)
 
 
 .. rubric:: Footnotes
 
@@ -19,6 +19,11 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
+- |Fix| The `categories_` attribute of :class:`preprocessing.OneHotEncoder` now
+  always contains an array of `object`s when using predefined categories that
+  are strings. Predefined categories encoded as bytes will no longer work
+  with `X` encoded as strings. :pr:`25174` by :user:`Tim Head <betatim>`.
+
 Changes impacting all modules
 -----------------------------
 
@@ -36,6 +41,13 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.ensemble`
+.......................
+- |Feature| Compute a custom out-of-bag score by passing a callable to
+  :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`,
+  :class:`ensemble.ExtraTreesClassifier` and :class:`ensemble.ExtraTreesRegressor`.
+  :pr:`25177` by :user:`Tim Head <betatim>`.
+
 :mod:`sklearn.pipeline`
 .......................
 - |Feature| :class:`pipeline.FeatureUnion` can now use indexing notation (e.g.
@@ -44,7 +56,6 @@ Changelog
 
 :mod:`sklearn.preprocessing`
 ............................
-
 - |Enhancement| Added support for `sample_weight` in
   :class:`preprocessing.KBinsDiscretizer`. This allows specifying the parameter
   `sample_weight` for each sample to be used while fitting. The option is only
 
@@ -6,6 +6,7 @@
 # Author: Andreas Mueller
 #         Joris Van den Bossche
 # License: BSD
+from numbers import Integral, Real
 from itertools import chain
 from collections import Counter
 
@@ -20,6 +21,7 @@
 from ..utils import Bunch
 from ..utils import _safe_indexing
 from ..utils import _get_column_indices
+from ..utils._param_validation import HasMethods, Interval, StrOptions, Hidden
 from ..utils._set_output import _get_output_config, _safe_set_output
 from ..utils import check_pandas_support
 from ..utils.metaestimators import _BaseComposition
@@ -212,6 +214,20 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
 
     _required_parameters = ["transformers"]
 
+    _parameter_constraints: dict = {
+        "transformers": [list, Hidden(tuple)],
+        "remainder": [
+            StrOptions({"drop", "passthrough"}),
+            HasMethods(["fit", "transform"]),
+            HasMethods(["fit_transform", "transform"]),
+        ],
+        "sparse_threshold": [Interval(Real, 0, 1, closed="both")],
+        "n_jobs": [Integral, None],
+        "transformer_weights": [dict, None],
+        "verbose": ["verbose"],
+        "verbose_feature_names_out": ["boolean"],
+    }
+
     def __init__(
         self,
         transformers,
@@ -406,6 +422,7 @@ def _validate_transformers(self):
             if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(
                 t, "transform"
             ):
+                # Used to validate the transformers in the `transformers` list
                 raise TypeError(
                     "All estimators should implement fit and "
                     "transform, or can be 'drop' or 'passthrough' "
@@ -432,16 +449,6 @@ def _validate_remainder(self, X):
         Validates ``remainder`` and defines ``_remainder`` targeting
         the remaining columns.
         """
-        is_transformer = (
-            hasattr(self.remainder, "fit") or hasattr(self.remainder, "fit_transform")
-        ) and hasattr(self.remainder, "transform")
-        if self.remainder not in ("drop", "passthrough") and not is_transformer:
-            raise ValueError(
-                "The remainder keyword needs to be one of 'drop', "
-                "'passthrough', or estimator. '%s' was passed instead"
-                % self.remainder
-            )
-
         self._n_features = X.shape[1]
         cols = set(chain(*self._transformer_to_input_indices.values()))
         remaining = sorted(set(range(self._n_features)) - cols)
@@ -688,6 +695,7 @@ def fit(self, X, y=None):
         self : ColumnTransformer
             This estimator.
         """
+        self._validate_params()
         # we use fit_transform to make sure to set sparse_output_ (for which we
         # need the transformed data) to have consistent output type in predict
         self.fit_transform(X, y=y)
@@ -714,6 +722,7 @@ def fit_transform(self, X, y=None):
             any result is a sparse matrix, everything will be converted to
             sparse matrices.
         """
+        self._validate_params()
         self._check_feature_names(X, reset=True)
 
         X = _check_X(X)
 
@@ -137,6 +137,23 @@ def test_column_transformer():
     assert len(both.transformers_) == 1
 
 
+def test_column_transformer_tuple_transformers_parameter():
+    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
+
+    transformers = [("trans1", Trans(), [0]), ("trans2", Trans(), [1])]
+
+    ct_with_list = ColumnTransformer(transformers)
+    ct_with_tuple = ColumnTransformer(tuple(transformers))
+
+    assert_array_equal(
+        ct_with_list.fit_transform(X_array), ct_with_tuple.fit_transform(X_array)
+    )
+    assert_array_equal(
+        ct_with_list.fit(X_array).transform(X_array),
+        ct_with_tuple.fit(X_array).transform(X_array),
+    )
+
+
 def test_column_transformer_dataframe():
     pd = pytest.importorskip("pandas")
 
@@ -812,15 +829,6 @@ def test_column_transformer_special_strings():
     assert len(ct.transformers_) == 2
     assert ct.transformers_[-1][0] != "remainder"
 
-    # None itself / other string is not valid
-    for val in [None, "other"]:
-        ct = ColumnTransformer([("trans1", Trans(), [0]), ("trans2", None, [1])])
-        msg = "All estimators should implement"
-        with pytest.raises(TypeError, match=msg):
-            ct.fit_transform(X_array)
-        with pytest.raises(TypeError, match=msg):
-            ct.fit(X_array)
-
 
 def test_column_transformer_remainder():
     X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
@@ -865,15 +873,6 @@ def test_column_transformer_remainder():
     assert ct.transformers_[-1][1] == "passthrough"
     assert_array_equal(ct.transformers_[-1][2], [1])
 
-    # error on invalid arg
-    ct = ColumnTransformer([("trans1", Trans(), [0])], remainder=1)
-    msg = "remainder keyword needs to be one of 'drop', 'passthrough', or estimator."
-    with pytest.raises(ValueError, match=msg):
-        ct.fit(X_array)
-
-    with pytest.raises(ValueError, match=msg):
-        ct.fit_transform(X_array)
-
     # check default for make_column_transformer
     ct = make_column_transformer((Trans(), [0]))
     assert ct.remainder == "drop"
 
@@ -81,6 +81,7 @@ def fetch_kddcup99(
     data_home : str, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
+
         .. versionadded:: 0.19
 
     shuffle : bool, default=False
Original file line number	Diff line number	Diff line change
`@@ -331,6 +331,7 @@`
`331`	`331`	`"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),`
`332`	`332`	`"joblib": ("https://joblib.readthedocs.io/en/latest/", None),`
`333`	`333`	`"seaborn": ("https://seaborn.pydata.org/", None),`
	`334`	`+ "skops": ("https://skops.readthedocs.io/en/stable/", None),`
`334`	`335`	`}`
`335`	`336`
`336`	`337`	`v = parse(release)`