From 7ed785aa448ae9739817d831aff3e76d18bdb2c3 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Fri, 8 Dec 2023 16:40:16 +0100
Subject: [PATCH 1/8] ENH add subsample to HGBT

---
 .../gradient_boosting.py                      | 41 +++++++++++++++++++
 .../tests/test_gradient_boosting.py           | 39 ++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index a83b1dbd0f4b9..f48c349e5a463 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -148,6 +148,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC):
         "min_samples_leaf": [Interval(Integral, 1, None, closed="left")],
         "l2_regularization": [Interval(Real, 0, None, closed="left")],
         "max_features": [Interval(RealNotInt, 0, 1, closed="right")],
+        "subsample": [Interval(Real, 0.0, 1.0, closed="right")],
         "monotonic_cst": ["array-like", dict, None],
         "interaction_cst": [
             list,
@@ -188,6 +189,7 @@ def __init__(
         min_samples_leaf,
         l2_regularization,
         max_features,
+        subsample,
         max_bins,
         categorical_features,
         monotonic_cst,
@@ -209,6 +211,7 @@ def __init__(
         self.min_samples_leaf = min_samples_leaf
         self.l2_regularization = l2_regularization
         self.max_features = max_features
+        self.subsample = subsample
         self.max_bins = max_bins
         self.monotonic_cst = monotonic_cst
         self.interaction_cst = interaction_cst
@@ -577,6 +580,7 @@ def fit(self, X, y, sample_weight=None):
             self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             feature_subsample_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             self._feature_subsample_rng = np.random.default_rng(feature_subsample_seed)
+            self._bagging_subsample_rng = self._feature_subsample_rng.spawn(1)[0]
 
         self._validate_parameters()
         monotonic_cst = _check_monotonic_cst(self, self.monotonic_cst)
@@ -836,6 +840,18 @@ def fit(self, X, y, sample_weight=None):
 
             begin_at_stage = self.n_iter_
 
+        # Out of bag settings
+        do_oob = self.subsample < 1.0
+        if do_oob:
+            n_inbag = max(1, int(self.subsample * n_samples))
+            # Same dtype as sample_weight.
+            sample_mask = np.zeros((n_samples,), dtype=np.float64)
+            sample_mask[:n_inbag] = 1
+            if sample_weight_train is None:
+                sample_weight_train_original = np.ones(n_samples)
+            else:
+                sample_weight_train_original = sample_weight_train
+
         # initialize gradients and hessians (empty arrays).
         # shape = (n_samples, n_trees_per_iteration).
         gradient, hessian = self._loss.init_gradient_and_hessian(
@@ -849,6 +865,11 @@ def fit(self, X, y, sample_weight=None):
                     "[{}/{}] ".format(iteration + 1, self.max_iter), end="", flush=True
                 )
 
+            # Do out of bag if required
+            if do_oob:
+                self._bagging_subsample_rng.shuffle(sample_mask)
+                sample_weight_train = sample_weight_train_original * sample_mask
+
             # Update gradients and hessians, inplace
             # Note that self._loss expects shape (n_samples,) for
             # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).
@@ -1487,6 +1508,14 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
 
         .. versionadded:: 1.4
 
+    subsample : float, default=1.0
+        The fraction of randomly chosen samples to be used for fitting the individual
+        tree(s) in each boosting iteration. If smaller than 1.0 this results in
+        Stochastic Gradient Boosting or bagging.
+        Values must be in the range `(0.0, 1.0]`.
+
+        .. versionadded:: 1.5
+
     max_bins : int, default=255
         The maximum number of bins to use for non-missing values. Before
         training, each feature of the input array `X` is binned into
@@ -1695,6 +1724,7 @@ def __init__(
         min_samples_leaf=20,
         l2_regularization=0.0,
         max_features=1.0,
+        subsample=1.0,
         max_bins=255,
         categorical_features="warn",
         monotonic_cst=None,
@@ -1717,6 +1747,7 @@ def __init__(
             min_samples_leaf=min_samples_leaf,
             l2_regularization=l2_regularization,
             max_features=max_features,
+            subsample=subsample,
             max_bins=max_bins,
             monotonic_cst=monotonic_cst,
             interaction_cst=interaction_cst,
@@ -1863,6 +1894,14 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
 
         .. versionadded:: 1.4
 
+    subsample : float, default=1.0
+        The fraction of randomly chosen samples to be used for fitting the individual
+        tree(s) in each boosting iteration. If smaller than 1.0 this results in
+        Stochastic Gradient Boosting or bagging.
+        Values must be in the range `(0.0, 1.0]`.
+
+        .. versionadded:: 1.5
+
     max_bins : int, default=255
         The maximum number of bins to use for non-missing values. Before
         training, each feature of the input array `X` is binned into
@@ -2073,6 +2112,7 @@ def __init__(
         min_samples_leaf=20,
         l2_regularization=0.0,
         max_features=1.0,
+        subsample=1.0,
         max_bins=255,
         categorical_features="warn",
         monotonic_cst=None,
@@ -2096,6 +2136,7 @@ def __init__(
             min_samples_leaf=min_samples_leaf,
             l2_regularization=l2_regularization,
             max_features=max_features,
+            subsample=subsample,
             max_bins=max_bins,
             categorical_features=categorical_features,
             monotonic_cst=monotonic_cst,
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 8adc0a19dc483..fc084fe37973a 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -1580,3 +1580,42 @@ def test_categorical_features_warn():
     msg = "The categorical_features parameter will change to 'from_dtype' in v1.6"
     with pytest.warns(FutureWarning, match=msg):
         hist.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "problem", ("regression", "binary_classification", "multiclass_classification")
+)
+def test_bagging(problem):
+    """Test subsample/bagging."""
+    n_samples = 100
+    n_features = 2
+    params = dict(max_iter=10, early_stopping=False, validation_fraction=None)
+    if problem == "regression":
+        X, y = make_regression(
+            n_samples=n_samples,
+            n_features=n_features,
+            n_informative=n_features,
+            random_state=0,
+        )
+        model = HistGradientBoostingRegressor
+    else:
+        n_classes = 2 if problem == "binary_classification" else 3
+        X, y = make_classification(
+            n_samples=n_samples,
+            n_features=n_features,
+            n_informative=n_features,
+            n_redundant=0,
+            n_clusters_per_class=1,
+            n_classes=n_classes,
+            random_state=0,
+        )
+        model = HistGradientBoostingClassifier
+
+    est = model(**params).fit(X, y)
+    est_bag_75 = model(subsample=0.75, **params).fit(X, y)
+    est_bag_50 = model(subsample=0.5, **params).fit(X, y)
+    est_bag_25 = model(subsample=0.25, **params).fit(X, y)
+
+    assert np.all(est.train_score_ > est_bag_75.train_score_)
+    assert np.all(est_bag_75.train_score_ > est_bag_50.train_score_)
+    assert np.all(est_bag_50.train_score_ > est_bag_25.train_score_)

From a5970684c5f96f452f595d47326fe07b9ea31bb1 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Thu, 4 Jan 2024 18:52:10 +0100
Subject: [PATCH 2/8] FIX numpy.random.Generator.spawn for numpy<1.25

---
 .../_hist_gradient_boosting/gradient_boosting.py    | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index f48c349e5a463..80624ee1a9571 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -36,6 +36,7 @@
 from ...utils import check_random_state, compute_sample_weight, is_scalar_nan, resample
 from ...utils._openmp_helpers import _openmp_effective_n_threads
 from ...utils._param_validation import Hidden, Interval, RealNotInt, StrOptions
+from ...utils.fixes import parse_version
 from ...utils.multiclass import check_classification_targets
 from ...utils.validation import (
     _check_monotonic_cst,
@@ -580,7 +581,17 @@ def fit(self, X, y, sample_weight=None):
             self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             feature_subsample_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             self._feature_subsample_rng = np.random.default_rng(feature_subsample_seed)
-            self._bagging_subsample_rng = self._feature_subsample_rng.spawn(1)[0]
+            if parse_version(np.__version__) >= parse_version("1.25"):
+                self._bagging_subsample_rng = self._feature_subsample_rng.spawn(1)[0]
+            else:
+                # See numpy Generator.spawn(self, int n_children).
+                n_children = 1
+                self._bagging_subsample_rng = [
+                    type(self._feature_subsample_rng)(g)
+                    for g in self._feature_subsample_rng._bit_generator.spawn(
+                        n_children
+                    )
+                ][0]
 
         self._validate_parameters()
         monotonic_cst = _check_monotonic_cst(self, self.monotonic_cst)

From 851c4f2b249c82d3ac6c0cd83c00d01b1af43b91 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Thu, 4 Jan 2024 20:14:59 +0100
Subject: [PATCH 3/8] FIX spawing older Generators 2nd try

---
 .../gradient_boosting.py                      | 26 +++++++++++++------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 80624ee1a9571..df74d16cf3e2e 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -581,17 +581,27 @@ def fit(self, X, y, sample_weight=None):
             self._random_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             feature_subsample_seed = rng.randint(np.iinfo(np.uint32).max, dtype="u8")
             self._feature_subsample_rng = np.random.default_rng(feature_subsample_seed)
+            # TODO: Remove this condition, once numpy 1.25 is the minimum version.
             if parse_version(np.__version__) >= parse_version("1.25"):
                 self._bagging_subsample_rng = self._feature_subsample_rng.spawn(1)[0]
             else:
-                # See numpy Generator.spawn(self, int n_children).
-                n_children = 1
-                self._bagging_subsample_rng = [
-                    type(self._feature_subsample_rng)(g)
-                    for g in self._feature_subsample_rng._bit_generator.spawn(
-                        n_children
-                    )
-                ][0]
+                # See numpy Generator.spawn(self, int n_children) and
+                # numpy BitGenerator.spawn
+
+                def spawnGenerator(self, n_children):
+                    return [
+                        type(self)(g)
+                        for g in spawnBitGenerator(self._bit_generator, n_children)
+                    ]
+
+                def spawnBitGenerator(self, n_children):
+                    return [
+                        type(self)(seed=s) for s in self._seed_seq.spawn(n_children)
+                    ]
+
+                self._bagging_subsample_rng = spawnGenerator(
+                    self._feature_subsample_rng, 1
+                )[0]
 
         self._validate_parameters()
         monotonic_cst = _check_monotonic_cst(self, self.monotonic_cst)

From b873839ec759ebd3300f5699ba162b0c3c36d621 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Mon, 8 Jan 2024 22:44:54 +0100
Subject: [PATCH 4/8] CLN rename arg self

---
 .../_hist_gradient_boosting/gradient_boosting.py  | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index df74d16cf3e2e..035157a43161f 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -588,18 +588,21 @@ def fit(self, X, y, sample_weight=None):
                 # See numpy Generator.spawn(self, int n_children) and
                 # numpy BitGenerator.spawn
 
-                def spawnGenerator(self, n_children):
+                def spawn_generator(generator, n_children):
                     return [
-                        type(self)(g)
-                        for g in spawnBitGenerator(self._bit_generator, n_children)
+                        type(generator)(g)
+                        for g in spawn_bit_generator(
+                            generator._bit_generator, n_children
+                        )
                     ]
 
-                def spawnBitGenerator(self, n_children):
+                def spawn_bit_generator(_bit_generator, n_children):
                     return [
-                        type(self)(seed=s) for s in self._seed_seq.spawn(n_children)
+                        type(_bit_generator)(seed=s)
+                        for s in _bit_generator._seed_seq.spawn(n_children)
                     ]
 
-                self._bagging_subsample_rng = spawnGenerator(
+                self._bagging_subsample_rng = spawn_generator(
                     self._feature_subsample_rng, 1
                 )[0]
 

From 7d5e42639c028d5d2b6fa22ac4d56c92bb3cb2c9 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Wed, 10 Jan 2024 13:50:23 +0100
Subject: [PATCH 5/8] DOC gradient boosting regularization

---
 .../plot_gradient_boosting_regularization.py  | 106 ++++++++++--------
 1 file changed, 58 insertions(+), 48 deletions(-)

diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py
index 218d69d5ac7d7..4293033432c72 100644
--- a/examples/ensemble/plot_gradient_boosting_regularization.py
+++ b/examples/ensemble/plot_gradient_boosting_regularization.py
@@ -4,9 +4,10 @@
 ================================
 
 Illustration of the effect of different regularization strategies
-for Gradient Boosting. The example is taken from Hastie et al 2009 [1]_.
+for Gradient Boosting. The example is taken from Chapter 10.12 of
+Hastie et al 2009 [1]_.
 
-The loss function used is binomial deviance. Regularization via
+The loss function used is log loss, aka binomial deviance. Regularization via
 shrinkage (``learning_rate < 1.0``) improves performance considerably.
 In combination with shrinkage, stochastic gradient boosting
 (``subsample < 1.0``) can produce more accurate models by reducing the
@@ -21,14 +22,13 @@
 
 """
 
-# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
-#
 # License: BSD 3 clause
 
 import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn import datasets, ensemble
+from sklearn import datasets
+from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 
@@ -44,48 +44,58 @@
     "max_leaf_nodes": 4,
     "max_depth": None,
     "random_state": 2,
-    "min_samples_split": 5,
+    "min_samples_leaf": 2,
 }
 
-plt.figure()
-
-for label, color, setting in [
-    ("No shrinkage", "orange", {"learning_rate": 1.0, "subsample": 1.0}),
-    ("learning_rate=0.2", "turquoise", {"learning_rate": 0.2, "subsample": 1.0}),
-    ("subsample=0.5", "blue", {"learning_rate": 1.0, "subsample": 0.5}),
-    (
-        "learning_rate=0.2, subsample=0.5",
-        "gray",
-        {"learning_rate": 0.2, "subsample": 0.5},
-    ),
-    (
-        "learning_rate=0.2, max_features=2",
-        "magenta",
-        {"learning_rate": 0.2, "max_features": 2},
-    ),
-]:
-    params = dict(original_params)
-    params.update(setting)
-
-    clf = ensemble.GradientBoostingClassifier(**params)
-    clf.fit(X_train, y_train)
-
-    # compute test set deviance
-    test_deviance = np.zeros((params["n_estimators"],), dtype=np.float64)
-
-    for i, y_proba in enumerate(clf.staged_predict_proba(X_test)):
-        test_deviance[i] = 2 * log_loss(y_test, y_proba[:, 1])
-
-    plt.plot(
-        (np.arange(test_deviance.shape[0]) + 1)[::5],
-        test_deviance[::5],
-        "-",
-        color=color,
-        label=label,
-    )
-
-plt.legend(loc="upper right")
-plt.xlabel("Boosting Iterations")
-plt.ylabel("Test Set Deviance")
-
-plt.show()
+fig, axes = plt.subplots(ncols=2, figsize=(10, 5), sharex=True, sharey=True)
+
+for j, model_class in enumerate(
+    [GradientBoostingClassifier, HistGradientBoostingClassifier]
+):
+    for label, color, setting in [
+        ("No shrinkage", "orange", {"learning_rate": 1.0, "subsample": 1.0}),
+        ("learning_rate=0.2", "turquoise", {"learning_rate": 0.2, "subsample": 1.0}),
+        ("subsample=0.5", "blue", {"learning_rate": 1.0, "subsample": 0.5}),
+        (
+            "learning_rate=0.2, subsample=0.5",
+            "gray",
+            {"learning_rate": 0.2, "subsample": 0.5},
+        ),
+        (
+            "learning_rate=0.2, max_features=2",
+            "magenta",
+            {"learning_rate": 0.2, "max_features": 2},
+        ),
+    ]:
+        params = dict(original_params)
+        params.update(setting)
+        n_iter = params["n_estimators"]
+        if model_class == HistGradientBoostingClassifier:
+            params["max_iter"] = params.pop("n_estimators")
+            if "max_features" in params:
+                params["max_features"] = float(
+                    params["max_features"] / X_train.shape[1]
+                )
+
+        clf = model_class(**params)
+        clf.fit(X_train, y_train)
+
+        # compute test set deviance
+        test_loss = np.zeros((n_iter,), dtype=np.float64)
+
+        for i, y_proba in enumerate(clf.staged_predict_proba(X_test)):
+            test_loss[i] = 2 * log_loss(y_test, y_proba[:, 1])
+
+        axes[j].plot(
+            (np.arange(test_loss.shape[0]) + 1)[::5],
+            test_loss[::5],
+            "-",
+            color=color,
+            label=label,
+        )
+
+    axes[j].set_ylim(None, 2)
+    axes[j].legend(loc="upper right")
+    axes[j].set_xlabel("Boosting Iterations")
+    axes[j].set_ylabel("Test Set Log Loss")
+    axes[j].set_title(model_class.__name__)

From eab8e8253c87f49ed6fa969c65fd82a22a3ebc3f Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Fri, 16 Feb 2024 22:37:48 +0100
Subject: [PATCH 6/8] ENH use subsampled X, g, h instead of zero sample_weight

With zero sample_weight, the count statistics of the histograms is wrong.
---
 .../_gradient_boosting.pyx                    | 21 +++++++++----
 .../gradient_boosting.py                      | 30 +++++++++++--------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
index fe234958e631a..79f3a86d4c300 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
@@ -5,12 +5,14 @@ import numpy as np
 
 from .common import Y_DTYPE
 from .common cimport Y_DTYPE_C
+from ...utils._typedefs cimport int64_t
 
 
 def _update_raw_predictions(
         Y_DTYPE_C [::1] raw_predictions,  # OUT
         grower,
         n_threads,
+        sample_idx,
 ):
     """Update raw_predictions with the predictions of the newest tree.
 
@@ -35,7 +37,7 @@ def _update_raw_predictions(
     values = np.array([leaf.value for leaf in leaves], dtype=Y_DTYPE)
 
     _update_raw_predictions_helper(raw_predictions, starts, stops, partition,
-                                   values, n_threads)
+                                   values, n_threads, sample_idx)
 
 
 cdef inline void _update_raw_predictions_helper(
@@ -45,6 +47,7 @@ cdef inline void _update_raw_predictions_helper(
         const unsigned int [::1] partition,
         const Y_DTYPE_C [::1] values,
         int n_threads,
+        const int64_t [::1] sample_idx=None,
 ):
 
     cdef:
@@ -52,7 +55,15 @@ cdef inline void _update_raw_predictions_helper(
         int leaf_idx
         int n_leaves = starts.shape[0]
 
-    for leaf_idx in prange(n_leaves, schedule='static', nogil=True,
-                           num_threads=n_threads):
-        for position in range(starts[leaf_idx], stops[leaf_idx]):
-            raw_predictions[partition[position]] += values[leaf_idx]
+    if sample_idx is None:
+        for leaf_idx in prange(
+            n_leaves, schedule='static', nogil=True, num_threads=n_threads
+        ):
+            for position in range(starts[leaf_idx], stops[leaf_idx]):
+                raw_predictions[partition[position]] += values[leaf_idx]
+    else:
+        for leaf_idx in prange(
+            n_leaves, schedule='static', nogil=True, num_threads=n_threads
+        ):
+            for position in range(starts[leaf_idx], stops[leaf_idx]):
+                raw_predictions[sample_idx[partition[position]]] += values[leaf_idx]
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 035157a43161f..2ba16b040b55c 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -867,14 +867,15 @@ def spawn_bit_generator(_bit_generator, n_children):
         # Out of bag settings
         do_oob = self.subsample < 1.0
         if do_oob:
+            # Note that setting sample_weight to zero for the corresponding samples
+            # would result in false "count" statistics of the histograms. Therefore,
+            # we make take copys (fancy indexed numpy arrays) for the subsampling.
             n_inbag = max(1, int(self.subsample * n_samples))
-            # Same dtype as sample_weight.
-            sample_mask = np.zeros((n_samples,), dtype=np.float64)
-            sample_mask[:n_inbag] = 1
-            if sample_weight_train is None:
-                sample_weight_train_original = np.ones(n_samples)
-            else:
-                sample_weight_train_original = sample_weight_train
+            sample_mask = np.zeros((n_samples,), dtype=np.bool)
+            sample_mask[:n_inbag] = True
+        else:
+            sample_mask = slice(None)
+            sample_mask_idx = None
 
         # initialize gradients and hessians (empty arrays).
         # shape = (n_samples, n_trees_per_iteration).
@@ -892,7 +893,7 @@ def spawn_bit_generator(_bit_generator, n_children):
             # Do out of bag if required
             if do_oob:
                 self._bagging_subsample_rng.shuffle(sample_mask)
-                sample_weight_train = sample_weight_train_original * sample_mask
+                sample_mask_idx = np.flatnonzero(sample_mask)
 
             # Update gradients and hessians, inplace
             # Note that self._loss expects shape (n_samples,) for
@@ -930,9 +931,9 @@ def spawn_bit_generator(_bit_generator, n_children):
             # Build `n_trees_per_iteration` trees.
             for k in range(self.n_trees_per_iteration_):
                 grower = TreeGrower(
-                    X_binned=X_binned_train,
-                    gradients=g_view[:, k],
-                    hessians=h_view[:, k],
+                    X_binned=X_binned_train[sample_mask],
+                    gradients=g_view[sample_mask, k],
+                    hessians=h_view[sample_mask, k],
                     n_bins=n_bins,
                     n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,
                     has_missing_values=has_missing_values,
@@ -971,7 +972,12 @@ def spawn_bit_generator(_bit_generator, n_children):
                 # Update raw_predictions with the predictions of the newly
                 # created tree.
                 tic_pred = time()
-                _update_raw_predictions(raw_predictions[:, k], grower, n_threads)
+                _update_raw_predictions(
+                    raw_predictions[:, k],
+                    grower,
+                    n_threads,
+                    sample_idx=sample_mask_idx,
+                )
                 toc_pred = time()
                 acc_prediction_time += toc_pred - tic_pred
 

From a37bbe4cd366b7cd3d0d4972970019a0854e54f4 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Fri, 16 Feb 2024 22:50:27 +0100
Subject: [PATCH 7/8] FIX bool insteaf of np.bool and enforce f-contiguity

---
 .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 41f4fe2d7360d..b44a7612d828d 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -873,7 +873,7 @@ def spawn_bit_generator(_bit_generator, n_children):
             # would result in false "count" statistics of the histograms. Therefore,
             # we make take copys (fancy indexed numpy arrays) for the subsampling.
             n_inbag = max(1, int(self.subsample * n_samples))
-            sample_mask = np.zeros((n_samples,), dtype=np.bool)
+            sample_mask = np.zeros((n_samples,), dtype=bool)
             sample_mask[:n_inbag] = True
         else:
             sample_mask = slice(None)
@@ -933,9 +933,9 @@ def spawn_bit_generator(_bit_generator, n_children):
             # Build `n_trees_per_iteration` trees.
             for k in range(self.n_trees_per_iteration_):
                 grower = TreeGrower(
-                    X_binned=X_binned_train[sample_mask],
-                    gradients=g_view[sample_mask, k],
-                    hessians=h_view[sample_mask, k],
+                    X_binned=np.asfortranarray(X_binned_train[sample_mask]),
+                    gradients=np.asfortranarray(g_view[sample_mask, k]),
+                    hessians=np.asfortranarray(h_view[sample_mask, k]),
                     n_bins=n_bins,
                     n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,
                     has_missing_values=has_missing_values,

From 7fb2f72130b3228efc4bf72502c6af9b71db4582 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Fri, 16 Feb 2024 23:11:41 +0100
Subject: [PATCH 8/8] FIX special case constant hessian  for subsample indices

---
 .../gradient_boosting.py                      | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index b44a7612d828d..13799e6d9e14d 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -892,11 +892,6 @@ def spawn_bit_generator(_bit_generator, n_children):
                     "[{}/{}] ".format(iteration + 1, self.max_iter), end="", flush=True
                 )
 
-            # Do out of bag if required
-            if do_oob:
-                self._bagging_subsample_rng.shuffle(sample_mask)
-                sample_mask_idx = np.flatnonzero(sample_mask)
-
             # Update gradients and hessians, inplace
             # Note that self._loss expects shape (n_samples,) for
             # n_trees_per_iteration = 1 else shape (n_samples, n_trees_per_iteration).
@@ -930,12 +925,28 @@ def spawn_bit_generator(_bit_generator, n_children):
                 g_view = gradient
                 h_view = hessian
 
+            # Do out of bag if required
+            if do_oob:
+                self._bagging_subsample_rng.shuffle(sample_mask)
+                sample_mask_idx = np.flatnonzero(sample_mask)
+                X_binned_grow = np.asfortranarray(X_binned_train[sample_mask])
+                g_grow = np.asfortranarray(g_view[sample_mask])
+                if self._loss.constant_hessian:
+                    h_grow = h_view
+                else:
+                    h_grow = np.asfortranarray(h_view[sample_mask])
+
+            else:
+                X_binned_grow = X_binned_train
+                g_grow = g_view
+                h_grow = h_view
+
             # Build `n_trees_per_iteration` trees.
             for k in range(self.n_trees_per_iteration_):
                 grower = TreeGrower(
-                    X_binned=np.asfortranarray(X_binned_train[sample_mask]),
-                    gradients=np.asfortranarray(g_view[sample_mask, k]),
-                    hessians=np.asfortranarray(h_view[sample_mask, k]),
+                    X_binned=X_binned_grow,
+                    gradients=g_grow[:, k],
+                    hessians=h_grow[:, k],
                     n_bins=n_bins,
                     n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,
                     has_missing_values=has_missing_values,