scikit-learn
diff --git a/‎doc/modules/classes.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/classes.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/whats_new/v1.3.rst
Lines changed: 8 additions & 2 deletions b/‎doc/whats_new/v1.3.rst
Lines changed: 8 additions & 2 deletions
diff --git a/‎sklearn/calibration.py
Lines changed: 23 additions & 36 deletions b/‎sklearn/calibration.py
Lines changed: 23 additions & 36 deletions
diff --git a/‎sklearn/compose/_column_transformer.py
Lines changed: 2 additions & 0 deletions b/‎sklearn/compose/_column_transformer.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎sklearn/discriminant_analysis.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/discriminant_analysis.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
Lines changed: 11 additions & 1 deletion b/‎sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
Lines changed: 11 additions & 1 deletion
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/binning.py
Lines changed: 6 additions & 1 deletion b/‎sklearn/ensemble/_hist_gradient_boosting/binning.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Lines changed: 9 additions & 6 deletions b/‎sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Lines changed: 9 additions & 6 deletions
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
Lines changed: 31 additions & 5 deletions b/‎sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
Lines changed: 31 additions & 5 deletions
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
Lines changed: 6 additions & 3 deletions b/‎sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎sklearn/impute/_base.py
Lines changed: 3 additions & 3 deletions b/‎sklearn/impute/_base.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎sklearn/linear_model/_base.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/_base.py
Lines changed: 1 addition & 1 deletion
@@ -1122,7 +1122,7 @@ See the :ref:`visualizations` section of the user guide for further details.
 
 .. autosummary::
    :toctree: generated/
-   :template: display.rst
+   :template: display_all_class_methods.rst
 
    metrics.ConfusionMatrixDisplay
    metrics.DetCurveDisplay
 
@@ -240,6 +240,12 @@ Changelog
   dataframe.
   :pr:`25931` by :user:`Yao Xiao <Charlie-XIAO>`.
 
+- |Fix| :class:`ensemble.HistGradientBoostingRegressor` and
+  :class:`ensemble.HistGradientBoostingClassifier` treats negative values for
+  categorical features consistently as missing values, following LightGBM's and
+  pandas' conventions.
+  :pr:`25629` by `Thomas Fan`_.
+
 :mod:`sklearn.exception`
 ........................
 - |Feature| Added :class:`exception.InconsistentVersionWarning` which is raised
@@ -284,8 +290,8 @@ Changelog
   estimators consistent with the rest of estimators.
   :pr:`25697` by :user:`John Pangas <jpangas>`.
 
-- |Enhancement| The `n_iter_` attribute has been included in 
-  :class:`linear_model.ARDRegression` to expose the actual number of iterations 
+- |Enhancement| The `n_iter_` attribute has been included in
+  :class:`linear_model.ARDRegression` to expose the actual number of iterations
   required to reach the stopping criterion.
   :pr:`25697` by :user:`John Pangas <jpangas>`.
 
 
@@ -30,16 +30,16 @@
 from .utils import (
     column_or_1d,
     indexable,
-    check_matplotlib_support,
     _safe_indexing,
 )
-from .utils._response import _get_response_values_binary
 
-from .utils.multiclass import check_classification_targets, type_of_target
+from .utils.multiclass import check_classification_targets
 from .utils.parallel import delayed, Parallel
 from .utils._param_validation import StrOptions, HasMethods, Hidden
+from .utils._plotting import _BinaryClassifierCurveDisplayMixin
 from .utils.validation import (
     _check_fit_params,
+    _check_pos_label_consistency,
     _check_sample_weight,
     _num_samples,
     check_consistent_length,
@@ -48,7 +48,6 @@
 from .isotonic import IsotonicRegression
 from .svm import LinearSVC
 from .model_selection import check_cv, cross_val_predict
-from .metrics._base import _check_pos_label_consistency
 
 
 class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
@@ -1013,7 +1012,7 @@ def calibration_curve(
     return prob_true, prob_pred
 
 
-class CalibrationDisplay:
+class CalibrationDisplay(_BinaryClassifierCurveDisplayMixin):
     """Calibration curve (also known as reliability diagram) visualization.
 
     It is recommended to use
@@ -1124,13 +1123,8 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
         display : :class:`~sklearn.calibration.CalibrationDisplay`
             Object that stores computed values.
         """
-        check_matplotlib_support("CalibrationDisplay.plot")
-        import matplotlib.pyplot as plt
+        self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name)
 
-        if ax is None:
-            fig, ax = plt.subplots()
-
-        name = self.estimator_name if name is None else name
         info_pos_label = (
             f"(Positive class: {self.pos_label})" if self.pos_label is not None else ""
         )
@@ -1141,20 +1135,20 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
         line_kwargs.update(**kwargs)
 
         ref_line_label = "Perfectly calibrated"
-        existing_ref_line = ref_line_label in ax.get_legend_handles_labels()[1]
+        existing_ref_line = ref_line_label in self.ax_.get_legend_handles_labels()[1]
         if ref_line and not existing_ref_line:
-            ax.plot([0, 1], [0, 1], "k:", label=ref_line_label)
-        self.line_ = ax.plot(self.prob_pred, self.prob_true, "s-", **line_kwargs)[0]
+            self.ax_.plot([0, 1], [0, 1], "k:", label=ref_line_label)
+        self.line_ = self.ax_.plot(self.prob_pred, self.prob_true, "s-", **line_kwargs)[
+            0
+        ]
 
         # We always have to show the legend for at least the reference line
-        ax.legend(loc="lower right")
+        self.ax_.legend(loc="lower right")
 
         xlabel = f"Mean predicted probability {info_pos_label}"
         ylabel = f"Fraction of positives {info_pos_label}"
-        ax.set(xlabel=xlabel, ylabel=ylabel)
+        self.ax_.set(xlabel=xlabel, ylabel=ylabel)
 
-        self.ax_ = ax
-        self.figure_ = ax.figure
         return self
 
     @classmethod
@@ -1260,15 +1254,15 @@ def from_estimator(
         >>> disp = CalibrationDisplay.from_estimator(clf, X_test, y_test)
         >>> plt.show()
         """
-        method_name = f"{cls.__name__}.from_estimator"
-        check_matplotlib_support(method_name)
-
-        check_is_fitted(estimator)
-        y_prob, pos_label = _get_response_values_binary(
-            estimator, X, response_method="predict_proba", pos_label=pos_label
+        y_prob, pos_label, name = cls._validate_and_get_response_values(
+            estimator,
+            X,
+            y,
+            response_method="predict_proba",
+            pos_label=pos_label,
+            name=name,
         )
 
-        name = name if name is not None else estimator.__class__.__name__
         return cls.from_predictions(
             y,
             y_prob,
@@ -1378,26 +1372,19 @@ def from_predictions(
         >>> disp = CalibrationDisplay.from_predictions(y_test, y_prob)
         >>> plt.show()
         """
-        method_name = f"{cls.__name__}.from_predictions"
-        check_matplotlib_support(method_name)
-
-        target_type = type_of_target(y_true)
-        if target_type != "binary":
-            raise ValueError(
-                f"The target y is not binary. Got {target_type} type of target."
-            )
+        pos_label_validated, name = cls._validate_from_predictions_params(
+            y_true, y_prob, sample_weight=None, pos_label=pos_label, name=name
+        )
 
         prob_true, prob_pred = calibration_curve(
             y_true, y_prob, n_bins=n_bins, strategy=strategy, pos_label=pos_label
         )
-        name = "Classifier" if name is None else name
-        pos_label = _check_pos_label_consistency(pos_label, y_true)
 
         disp = cls(
             prob_true=prob_true,
             prob_pred=prob_pred,
             y_prob=y_prob,
             estimator_name=name,
-            pos_label=pos_label,
+            pos_label=pos_label_validated,
         )
         return disp.plot(ax=ax, ref_line=ref_line, **kwargs)
@@ -936,6 +936,8 @@ def _get_transformer_list(estimators):
     return transformer_list
 
 
+# This function is not validated using validate_params because
+# it's just a factory for ColumnTransformer.
 def make_column_transformer(
     *transformers,
     remainder="drop",
 
@@ -640,7 +640,7 @@ def fit(self, X, y):
             intercept_ = xp.asarray(
                 self.intercept_[1] - self.intercept_[0], dtype=X.dtype
             )
-            self.intercept_ = xp.reshape(intercept_, 1)
+            self.intercept_ = xp.reshape(intercept_, (1,))
         self._n_features_out = self._max_components
         return self
 
 
@@ -8,6 +8,7 @@ from .common cimport X_DTYPE_C, X_BINNED_DTYPE_C
 
 def _map_to_bins(const X_DTYPE_C [:, :] data,
                  list binning_thresholds,
+                 const unsigned char[::1] is_categorical,
                  const unsigned char missing_values_bin_idx,
                  int n_threads,
                  X_BINNED_DTYPE_C [::1, :] binned):
@@ -23,6 +24,8 @@ def _map_to_bins(const X_DTYPE_C [:, :] data,
     binning_thresholds : list of arrays
         For each feature, stores the increasing numeric values that are
         used to separate the bins.
+    is_categorical : ndarray of unsigned char of shape (n_features,)
+        Indicates categorical features.
     n_threads : int
         Number of OpenMP threads to use.
     binned : ndarray, shape (n_samples, n_features)
@@ -34,13 +37,15 @@ def _map_to_bins(const X_DTYPE_C [:, :] data,
     for feature_idx in range(data.shape[1]):
         _map_col_to_bins(data[:, feature_idx],
                              binning_thresholds[feature_idx],
+                             is_categorical[feature_idx],
                              missing_values_bin_idx,
                              n_threads,
                              binned[:, feature_idx])
 
 
 cdef void _map_col_to_bins(const X_DTYPE_C [:] data,
                                const X_DTYPE_C [:] binning_thresholds,
+                               const unsigned char is_categorical,
                                const unsigned char missing_values_bin_idx,
                                int n_threads,
                                X_BINNED_DTYPE_C [:] binned):
@@ -53,7 +58,12 @@ cdef void _map_col_to_bins(const X_DTYPE_C [:] data,
 
     for i in prange(data.shape[0], schedule='static', nogil=True,
                     num_threads=n_threads):
-        if isnan(data[i]):
+        if (
+            isnan(data[i]) or
+            # To follow LightGBM's conventions, negative values for
+            # categorical features are considered as missing values.
+            (is_categorical and data[i] < 0)
+        ):
             binned[i] = missing_values_bin_idx
         else:
             # for known values, use binary search
 
@@ -275,7 +275,12 @@ def transform(self, X):
         n_threads = _openmp_effective_n_threads(self.n_threads)
         binned = np.zeros_like(X, dtype=X_BINNED_DTYPE, order="F")
         _map_to_bins(
-            X, self.bin_thresholds_, self.missing_values_bin_idx_, n_threads, binned
+            X,
+            self.bin_thresholds_,
+            self.is_categorical_,
+            self.missing_values_bin_idx_,
+            n_threads,
+            binned,
         )
         return binned
 
 
@@ -269,6 +269,11 @@ def _check_categories(self, X):
                 if missing.any():
                     categories = categories[~missing]
 
+                # Treat negative values for categorical features as missing values.
+                negative_categories = categories < 0
+                if negative_categories.any():
+                    categories = categories[~negative_categories]
+
                 if hasattr(self, "feature_names_in_"):
                     feature_name = f"'{self.feature_names_in_[f_idx]}'"
                 else:
@@ -1265,9 +1270,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
           data has feature names).
 
         For each categorical feature, there must be at most `max_bins` unique
-        categories, and each categorical value must be in [0, max_bins -1].
-        During prediction, categories encoded as a negative value are treated as
-        missing values.
+        categories, and each categorical value must be less then `max_bins - 1`.
+        Negative values for categorical features are treated as missing values.
 
         Read more in the :ref:`User Guide <categorical_support_gbdt>`.
 
@@ -1623,9 +1627,8 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
           data has feature names).
 
         For each categorical feature, there must be at most `max_bins` unique
-        categories, and each categorical value must be in [0, max_bins -1].
-        During prediction, categories encoded as a negative value are treated as
-        missing values.
+        categories, and each categorical value must be less then `max_bins - 1`.
+        Negative values for categorical features are treated as missing values.
 
         Read more in the :ref:`User Guide <categorical_support_gbdt>`.
 
 
@@ -95,8 +95,9 @@ def test_map_to_bins(max_bins):
         _find_binning_thresholds(DATA[:, i], max_bins=max_bins) for i in range(2)
     ]
     binned = np.zeros_like(DATA, dtype=X_BINNED_DTYPE, order="F")
+    is_categorical = np.zeros(2, dtype=np.uint8)
     last_bin_idx = max_bins
-    _map_to_bins(DATA, bin_thresholds, last_bin_idx, n_threads, binned)
+    _map_to_bins(DATA, bin_thresholds, is_categorical, last_bin_idx, n_threads, binned)
     assert binned.shape == DATA.shape
     assert binned.dtype == np.uint8
     assert binned.flags.f_contiguous
@@ -357,10 +358,35 @@ def test_categorical_feature(n_bins):
     expected_trans = np.array([[0, 1, 2, n_bins - 1, 3, 4, 5]]).T
     assert_array_equal(bin_mapper.transform(X), expected_trans)
 
-    # For unknown categories, the mapping is incorrect / undefined. This never
-    # happens in practice. This check is only for illustration purpose.
-    X = np.array([[-1, 100]], dtype=X_DTYPE).T
-    expected_trans = np.array([[0, 6]]).T
+    # Negative categories are mapped to the missing values' bin
+    # (i.e. the bin of index `missing_values_bin_idx_ == n_bins - 1).
+    # Unknown positive categories does not happen in practice and tested
+    # for illustration purpose.
+    X = np.array([[-4, -1, 100]], dtype=X_DTYPE).T
+    expected_trans = np.array([[n_bins - 1, n_bins - 1, 6]]).T
+    assert_array_equal(bin_mapper.transform(X), expected_trans)
+
+
+def test_categorical_feature_negative_missing():
+    """Make sure bin mapper treats negative categories as missing values."""
+    X = np.array(
+        [[4] * 500 + [1] * 3 + [5] * 10 + [-1] * 3 + [np.nan] * 4], dtype=X_DTYPE
+    ).T
+    bin_mapper = _BinMapper(
+        n_bins=4,
+        is_categorical=np.array([True]),
+        known_categories=[np.array([1, 4, 5], dtype=X_DTYPE)],
+    ).fit(X)
+
+    assert bin_mapper.n_bins_non_missing_ == [3]
+
+    X = np.array([[-1, 1, 3, 5, np.nan]], dtype=X_DTYPE).T
+
+    # Negative values for categorical features are considered as missing values.
+    # They are mapped to the bin of index `bin_mapper.missing_values_bin_idx_`,
+    # which is 3 here.
+    assert bin_mapper.missing_values_bin_idx_ == 3
+    expected_trans = np.array([[3, 0, 1, 2, 3]]).T
     assert_array_equal(bin_mapper.transform(X), expected_trans)
 
 
 
@@ -950,7 +950,10 @@ def test_staged_predict(HistGradientBoosting, X, y):
     "Est", (HistGradientBoostingRegressor, HistGradientBoostingClassifier)
 )
 @pytest.mark.parametrize("bool_categorical_parameter", [True, False])
-def test_unknown_categories_nan(insert_missing, Est, bool_categorical_parameter):
+@pytest.mark.parametrize("missing_value", [np.nan, -1])
+def test_unknown_categories_nan(
+    insert_missing, Est, bool_categorical_parameter, missing_value
+):
     # Make sure no error is raised at predict if a category wasn't seen during
     # fit. We also make sure they're treated as nans.
 
@@ -970,7 +973,7 @@ def test_unknown_categories_nan(insert_missing, Est, bool_categorical_parameter)
     if insert_missing:
         mask = rng.binomial(1, 0.01, size=X.shape).astype(bool)
         assert mask.sum() > 0
-        X[mask] = np.nan
+        X[mask] = missing_value
 
     est = Est(max_iter=20, categorical_features=categorical_features).fit(X, y)
     assert_array_equal(est.is_categorical_, [False, True])
@@ -979,7 +982,7 @@ def test_unknown_categories_nan(insert_missing, Est, bool_categorical_parameter)
     # unknown categories will be treated as nans
     X_test = np.zeros((10, X.shape[1]), dtype=float)
     X_test[:5, 1] = 30
-    X_test[5:, 1] = np.nan
+    X_test[5:, 1] = missing_value
     assert len(np.unique(est.predict(X_test))) == 1
 
 
 
@@ -11,7 +11,7 @@
 from scipy import sparse as sp
 
 from ..base import BaseEstimator, TransformerMixin
-from ..utils._param_validation import StrOptions, Hidden
+from ..utils._param_validation import StrOptions, Hidden, MissingValues
 from ..utils.fixes import _mode
 from ..utils.sparsefuncs import _get_median
 from ..utils.validation import check_is_fitted
@@ -78,7 +78,7 @@ class _BaseImputer(TransformerMixin, BaseEstimator):
     """
 
     _parameter_constraints: dict = {
-        "missing_values": ["missing_values"],
+        "missing_values": [MissingValues()],
         "add_indicator": ["boolean"],
         "keep_empty_features": ["boolean"],
     }
@@ -800,7 +800,7 @@ class MissingIndicator(TransformerMixin, BaseEstimator):
     """
 
     _parameter_constraints: dict = {
-        "missing_values": [numbers.Real, numbers.Integral, str, None],
+        "missing_values": [MissingValues()],
         "features": [StrOptions({"missing-only", "all"})],
         "sparse": ["boolean", StrOptions({"auto"})],
         "error_on_new": ["boolean"],
 
@@ -399,7 +399,7 @@ def decision_function(self, X):
 
         X = self._validate_data(X, accept_sparse="csr", reset=False)
         scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
-        return xp.reshape(scores, -1) if scores.shape[1] == 1 else scores
+        return xp.reshape(scores, (-1,)) if scores.shape[1] == 1 else scores
 
     def predict(self, X):
         """
Original file line number	Diff line number	Diff line change
`@@ -640,7 +640,7 @@ def fit(self, X, y):`
`640`	`640`	`intercept_ = xp.asarray(`
`641`	`641`	`self.intercept_[1] - self.intercept_[0], dtype=X.dtype`
`642`	`642`	`)`
`643`		`- self.intercept_ = xp.reshape(intercept_, 1)`
	`643`	`+ self.intercept_ = xp.reshape(intercept_, (1,))`
`644`	`644`	`self._n_features_out = self._max_components`
`645`	`645`	`return self`
`646`	`646`