From 25f863124d5ddef03c916b5757754cdcd345432b Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Sat, 15 Apr 2023 21:31:05 +0200
Subject: [PATCH 01/12] ENH reuse parent histogram

---
 .../_hist_gradient_boosting/grower.py         | 11 ++-
 .../_hist_gradient_boosting/histogram.pyx     | 68 ++++++++++++++++++-
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 4ed6041ecaa30..8370dfc17e4ca 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -574,9 +574,13 @@ def split_next(self):
             if n_samples_left < n_samples_right:
                 smallest_child = left_child_node
                 largest_child = right_child_node
+                split_bin_start = 0
+                split_bin_end = node.split_info.bin_idx + 1
             else:
                 smallest_child = right_child_node
                 largest_child = left_child_node
+                split_bin_start = node.split_info.bin_idx + 1
+                split_bin_end = self.histogram_builder.n_bins
 
             # We use the brute O(n_samples) method on the child that has the
             # smallest number of samples, and the subtraction trick O(n_bins)
@@ -584,7 +588,12 @@ def split_next(self):
             # Note that both left and right child have the same allowed_features.
             tic = time()
             smallest_child.histograms = self.histogram_builder.compute_histograms_brute(
-                smallest_child.sample_indices, smallest_child.allowed_features
+                smallest_child.sample_indices,
+                smallest_child.allowed_features,
+                node.split_info.feature_idx,
+                split_bin_start,
+                split_bin_end,
+                node.histograms,
             )
             largest_child.histograms = (
                 self.histogram_builder.compute_histograms_subtraction(
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 336ba372cb53a..6ee9305f6e669 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -104,6 +104,10 @@ cdef class HistogramBuilder:
         HistogramBuilder self,
         const unsigned int [::1] sample_indices,       # IN
         const unsigned int [:] allowed_features=None,  # IN
+        const int split_feature_idx=-1,                # IN
+        const unsigned int split_bin_start=0,          # IN
+        const unsigned int split_bin_end=0,            # IN
+        const hist_struct [:, ::1] parent_histograms=None,  # IN
     ):
         """Compute the histograms of the node by scanning through all the data.
 
@@ -118,6 +122,18 @@ cdef class HistogramBuilder:
             Indices of the features that are allowed by interaction constraints to be
             split.
 
+        split_feature_idx : int
+            Feature index of the feature that the parent node was split on.
+
+        split_bin_start : unsigned int
+            Start of the bin indices belonging to the feature that was split on.
+
+        split_bin_end : unsigned int
+            End (+1) of the bin indices belonging to the feature that was split on.
+
+        parent_histograms : ndarray of HISTOGRAM_DTYPE, shape (n_features, n_bins)
+            The histograms of the parent.
+
         Returns
         -------
         histograms : ndarray of HISTOGRAM_DTYPE, shape (n_features, n_bins)
@@ -141,6 +157,7 @@ cdef class HistogramBuilder:
                 dtype=HISTOGRAM_DTYPE
             )
             bint has_interaction_cst = allowed_features is not None
+            bint has_parent_hist = split_feature_idx >= 0
             int n_threads = self.n_threads
 
         if has_interaction_cst:
@@ -172,12 +189,57 @@ cdef class HistogramBuilder:
                 else:
                     feature_idx = f_idx
 
-                self._compute_histogram_brute_single_feature(
-                    feature_idx, sample_indices, histograms
-                )
+                if has_parent_hist and feature_idx == split_feature_idx:
+                    self._compute_histogram_of_split_feature(
+                        feature_idx,
+                        sample_indices,
+                        histograms,
+                        split_bin_start,
+                        split_bin_end,
+                        parent_histograms,
+                    )
+                else:
+                    self._compute_histogram_brute_single_feature(
+                        feature_idx, sample_indices, histograms
+                    )
 
         return histograms
 
+    cdef void _compute_histogram_of_split_feature(
+        HistogramBuilder self,
+        const int feature_idx,
+        const unsigned int [::1] sample_indices,  # IN
+        hist_struct [:, ::1] histograms,          # OUT
+        const unsigned int split_bin_start,       # IN
+        const unsigned int split_bin_end,         # IN
+        const hist_struct [:, ::1] parent_histograms, # IN
+    ) noexcept nogil:  # OUT
+        """Compute the histogram for the feature that was split on."""
+        cdef:
+            unsigned int bin_idx = 0
+
+        if split_bin_start == 0:
+            for bin_idx in range(split_bin_end, self.n_bins):
+                histograms[feature_idx, bin_idx].sum_gradients = 0.
+                histograms[feature_idx, bin_idx].sum_hessians = 0.
+                histograms[feature_idx, bin_idx].count = 0
+        else:
+            for bin_idx in range(split_bin_start):
+                histograms[feature_idx, bin_idx].sum_gradients = 0.
+                histograms[feature_idx, bin_idx].sum_hessians = 0.
+                histograms[feature_idx, bin_idx].count = 0
+
+        for bin_idx in range(split_bin_start, split_bin_end):
+            histograms[feature_idx, bin_idx].sum_gradients = (
+                parent_histograms[feature_idx, bin_idx].sum_gradients
+            )
+            histograms[feature_idx, bin_idx].sum_hessians = (
+                parent_histograms[feature_idx, bin_idx].sum_hessians
+            )
+            histograms[feature_idx, bin_idx].count = (
+                parent_histograms[feature_idx, bin_idx].count
+            )
+
     cdef void _compute_histogram_brute_single_feature(
             HistogramBuilder self,
             const int feature_idx,

From b1efb34516f4458894027638e2982a6bced8a297 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Wed, 6 Sep 2023 22:32:40 +0200
Subject: [PATCH 02/12] CLN address review comments

---
 .../_hist_gradient_boosting/histogram.pyx     | 43 +++++++++----------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 6ee9305f6e669..924950a052392 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -190,13 +190,12 @@ cdef class HistogramBuilder:
                     feature_idx = f_idx
 
                 if has_parent_hist and feature_idx == split_feature_idx:
-                    self._compute_histogram_of_split_feature(
-                        feature_idx,
-                        sample_indices,
-                        histograms,
-                        split_bin_start,
-                        split_bin_end,
-                        parent_histograms,
+                    self._compute_histogram_single_feature_from_parent(
+                        feature_idx=feature_idx,
+                        split_bin_start=split_bin_start,
+                        split_bin_end=split_bin_end,
+                        histograms=histograms,
+                        parent_histograms=parent_histograms,
                     )
                 else:
                     self._compute_histogram_brute_single_feature(
@@ -205,29 +204,27 @@ cdef class HistogramBuilder:
 
         return histograms
 
-    cdef void _compute_histogram_of_split_feature(
+    cdef void _compute_histogram_single_feature_from_parent(
         HistogramBuilder self,
         const int feature_idx,
-        const unsigned int [::1] sample_indices,  # IN
-        hist_struct [:, ::1] histograms,          # OUT
-        const unsigned int split_bin_start,       # IN
-        const unsigned int split_bin_end,         # IN
+        const unsigned int split_bin_start,           # IN
+        const unsigned int split_bin_end,             # IN
         const hist_struct [:, ::1] parent_histograms, # IN
-    ) noexcept nogil:  # OUT
+        hist_struct [:, ::1] histograms,              # OUT
+    ) noexcept nogil:
         """Compute the histogram for the feature that was split on."""
         cdef:
             unsigned int bin_idx = 0
 
-        if split_bin_start == 0:
-            for bin_idx in range(split_bin_end, self.n_bins):
-                histograms[feature_idx, bin_idx].sum_gradients = 0.
-                histograms[feature_idx, bin_idx].sum_hessians = 0.
-                histograms[feature_idx, bin_idx].count = 0
-        else:
-            for bin_idx in range(split_bin_start):
-                histograms[feature_idx, bin_idx].sum_gradients = 0.
-                histograms[feature_idx, bin_idx].sum_hessians = 0.
-                histograms[feature_idx, bin_idx].count = 0
+        for bin_idx in range(split_bin_start):
+            histograms[feature_idx, bin_idx].sum_gradients = 0.
+            histograms[feature_idx, bin_idx].sum_hessians = 0.
+            histograms[feature_idx, bin_idx].count = 0
+
+        for bin_idx in range(split_bin_end, self.n_bins):
+            histograms[feature_idx, bin_idx].sum_gradients = 0.
+            histograms[feature_idx, bin_idx].sum_hessians = 0.
+            histograms[feature_idx, bin_idx].count = 0
 
         for bin_idx in range(split_bin_start, split_bin_end):
             histograms[feature_idx, bin_idx].sum_gradients = (

From eec5d2b2ace757c765a20b274db252492a0676bc Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Wed, 6 Sep 2023 22:45:38 +0200
Subject: [PATCH 03/12] CLN make linter happy

---
 sklearn/ensemble/_hist_gradient_boosting/histogram.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 924950a052392..6c48ad1341ff4 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -207,10 +207,10 @@ cdef class HistogramBuilder:
     cdef void _compute_histogram_single_feature_from_parent(
         HistogramBuilder self,
         const int feature_idx,
-        const unsigned int split_bin_start,           # IN
-        const unsigned int split_bin_end,             # IN
-        const hist_struct [:, ::1] parent_histograms, # IN
-        hist_struct [:, ::1] histograms,              # OUT
+        const unsigned int split_bin_start,            # IN
+        const unsigned int split_bin_end,              # IN
+        const hist_struct [:, ::1] parent_histograms,  # IN
+        hist_struct [:, ::1] histograms,               # OUT
     ) noexcept nogil:
         """Compute the histogram for the feature that was split on."""
         cdef:

From f9d22e2cd742121ac821874bb31f951f79c11e41 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Tue, 14 Nov 2023 21:34:10 +0100
Subject: [PATCH 04/12] MNT move split_info_struct to common.pxd

---
 .../_hist_gradient_boosting/common.pxd        | 20 +++++++++++++++++
 .../_hist_gradient_boosting/splitting.pyx     | 22 ++-----------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/common.pxd b/sklearn/ensemble/_hist_gradient_boosting/common.pxd
index 3e71f2dc56060..ccc4cbe53712c 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/common.pxd
+++ b/sklearn/ensemble/_hist_gradient_boosting/common.pxd
@@ -38,6 +38,26 @@ cdef packed struct node_struct:
     # Only used if is_categorical is True
     unsigned int bitset_idx
 
+
+cdef struct split_info_struct:
+    # Same as the SplitInfo class, but we need a C struct to use it in the
+    # nogil sections and to use in arrays.
+    Y_DTYPE_C gain
+    int feature_idx
+    unsigned int bin_idx
+    unsigned char missing_go_to_left
+    Y_DTYPE_C sum_gradient_left
+    Y_DTYPE_C sum_gradient_right
+    Y_DTYPE_C sum_hessian_left
+    Y_DTYPE_C sum_hessian_right
+    unsigned int n_samples_left
+    unsigned int n_samples_right
+    Y_DTYPE_C value_left
+    Y_DTYPE_C value_right
+    unsigned char is_categorical
+    BITSET_DTYPE_C left_cat_bitset
+
+
 cpdef enum MonotonicConstraint:
     NO_CST = 0
     POS = 1
diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index 17f5769dfaf14..ac1d2f4e56ba5 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -16,34 +16,16 @@ from libc.string cimport memcpy
 
 from .common cimport X_BINNED_DTYPE_C
 from .common cimport Y_DTYPE_C
-from .common cimport hist_struct
 from .common cimport BITSET_INNER_DTYPE_C
 from .common cimport BITSET_DTYPE_C
 from .common cimport MonotonicConstraint
+from .common cimport hist_struct
+from .common cimport split_info_struct
 from ._bitset cimport init_bitset
 from ._bitset cimport set_bitset
 from ._bitset cimport in_bitset
 
 
-cdef struct split_info_struct:
-    # Same as the SplitInfo class, but we need a C struct to use it in the
-    # nogil sections and to use in arrays.
-    Y_DTYPE_C gain
-    int feature_idx
-    unsigned int bin_idx
-    unsigned char missing_go_to_left
-    Y_DTYPE_C sum_gradient_left
-    Y_DTYPE_C sum_gradient_right
-    Y_DTYPE_C sum_hessian_left
-    Y_DTYPE_C sum_hessian_right
-    unsigned int n_samples_left
-    unsigned int n_samples_right
-    Y_DTYPE_C value_left
-    Y_DTYPE_C value_right
-    unsigned char is_categorical
-    BITSET_DTYPE_C left_cat_bitset
-
-
 # used in categorical splits for sorting categories by increasing values of
 # sum_gradients / sum_hessians
 cdef struct categorical_info:

From 65b852be4537063cc420a801eee96367b14b5c93 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Tue, 14 Nov 2023 21:59:18 +0100
Subject: [PATCH 05/12] ENH support categorical parent feature histograms

---
 .../_hist_gradient_boosting/grower.py         |  13 +-
 .../_hist_gradient_boosting/histogram.pyx     | 128 ++++++++++++------
 2 files changed, 92 insertions(+), 49 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index c0705842bd6e3..1d95b6061b1ba 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -574,13 +574,11 @@ def split_next(self):
             if n_samples_left < n_samples_right:
                 smallest_child = left_child_node
                 largest_child = right_child_node
-                split_bin_start = 0
-                split_bin_end = node.split_info.bin_idx + 1
+                is_left_child = True
             else:
                 smallest_child = right_child_node
                 largest_child = left_child_node
-                split_bin_start = node.split_info.bin_idx + 1
-                split_bin_end = self.histogram_builder.n_bins
+                is_left_child = False
 
             # We use the brute O(n_samples) method on the child that has the
             # smallest number of samples, and the subtraction trick O(n_bins)
@@ -590,10 +588,9 @@ def split_next(self):
             smallest_child.histograms = self.histogram_builder.compute_histograms_brute(
                 smallest_child.sample_indices,
                 smallest_child.allowed_features,
-                node.split_info.feature_idx,
-                split_bin_start,
-                split_bin_end,
-                node.histograms,
+                parent_split_info=node.split_info,
+                parent_histograms=node.histograms,
+                is_left_child=is_left_child,
             )
             largest_child.histograms = (
                 self.histogram_builder.compute_histograms_subtraction(
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 6c48ad1341ff4..8de1ff0659ece 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -7,10 +7,12 @@ from cython.parallel import prange
 
 import numpy as np
 
+from .common cimport BITSET_INNER_DTYPE_C
 from .common import HISTOGRAM_DTYPE
-from .common cimport hist_struct
 from .common cimport X_BINNED_DTYPE_C
 from .common cimport G_H_DTYPE_C
+from .common cimport hist_struct
+from ._bitset cimport in_bitset
 
 
 # Notes:
@@ -102,12 +104,11 @@ cdef class HistogramBuilder:
 
     def compute_histograms_brute(
         HistogramBuilder self,
-        const unsigned int [::1] sample_indices,       # IN
-        const unsigned int [:] allowed_features=None,  # IN
-        const int split_feature_idx=-1,                # IN
-        const unsigned int split_bin_start=0,          # IN
-        const unsigned int split_bin_end=0,            # IN
+        const unsigned int [::1] sample_indices,            # IN
+        const unsigned int [:] allowed_features=None,       # IN
+        object parent_split_info=None,                      # IN
         const hist_struct [:, ::1] parent_histograms=None,  # IN
+        const bint is_left_child=True,                      # IN
     ):
         """Compute the histograms of the node by scanning through all the data.
 
@@ -122,18 +123,16 @@ cdef class HistogramBuilder:
             Indices of the features that are allowed by interaction constraints to be
             split.
 
-        split_feature_idx : int
-            Feature index of the feature that the parent node was split on.
-
-        split_bin_start : unsigned int
-            Start of the bin indices belonging to the feature that was split on.
-
-        split_bin_end : unsigned int
-            End (+1) of the bin indices belonging to the feature that was split on.
+        parent_split_info : split_info_struct
+            The split_info of the parent node.
 
         parent_histograms : ndarray of HISTOGRAM_DTYPE, shape (n_features, n_bins)
             The histograms of the parent.
 
+        is_left_child : bool
+            True if the histogram of a left child is being computed, False for a right
+            child.
+
         Returns
         -------
         histograms : ndarray of HISTOGRAM_DTYPE, shape (n_features, n_bins)
@@ -157,9 +156,30 @@ cdef class HistogramBuilder:
                 dtype=HISTOGRAM_DTYPE
             )
             bint has_interaction_cst = allowed_features is not None
-            bint has_parent_hist = split_feature_idx >= 0
+            # Feature index of the feature that the parent node was split on.
+            int split_feature_idx
+            # Start of the bin indices belonging to the feature that was split on.
+            unsigned int split_bin_start
+            # End (+1) of the bin indices belonging to the feature that was split on.
+            unsigned int split_bin_end
+            unsigned char is_categorical
+            BITSET_INNER_DTYPE_C [:] left_cat_bitset
+            bint has_parent_hist = False
             int n_threads = self.n_threads
 
+        if parent_split_info is not None:
+            has_parent_hist = True
+            split_feature_idx = parent_split_info.feature_idx
+            is_categorical = parent_split_info.is_categorical
+            if is_left_child:
+                split_bin_start = 0
+                split_bin_end = parent_split_info.bin_idx + 1
+            else:
+                split_bin_start = parent_split_info.bin_idx + 1
+                split_bin_end = self.n_bins
+            if is_categorical:
+                left_cat_bitset = parent_split_info.left_cat_bitset
+
         if has_interaction_cst:
             n_allowed_features = allowed_features.shape[0]
 
@@ -194,6 +214,9 @@ cdef class HistogramBuilder:
                         feature_idx=feature_idx,
                         split_bin_start=split_bin_start,
                         split_bin_end=split_bin_end,
+                        is_categorical=is_categorical,
+                        left_cat_bitset=left_cat_bitset,
+                        is_left_child=is_left_child,
                         histograms=histograms,
                         parent_histograms=parent_histograms,
                     )
@@ -204,38 +227,61 @@ cdef class HistogramBuilder:
 
         return histograms
 
-    cdef void _compute_histogram_single_feature_from_parent(
+    cpdef void _compute_histogram_single_feature_from_parent(
         HistogramBuilder self,
         const int feature_idx,
-        const unsigned int split_bin_start,            # IN
-        const unsigned int split_bin_end,              # IN
-        const hist_struct [:, ::1] parent_histograms,  # IN
-        hist_struct [:, ::1] histograms,               # OUT
+        const unsigned int split_bin_start,              # IN
+        const unsigned int split_bin_end,                # IN
+        const unsigned char is_categorical,              # IN
+        const BITSET_INNER_DTYPE_C [:] left_cat_bitset,  # IN
+        const bint is_left_child,                        # IN
+        const hist_struct [:, ::1] parent_histograms,    # IN
+        hist_struct [:, ::1] histograms,                 # OUT
     ) noexcept nogil:
         """Compute the histogram for the feature that was split on."""
         cdef:
             unsigned int bin_idx = 0
-
-        for bin_idx in range(split_bin_start):
-            histograms[feature_idx, bin_idx].sum_gradients = 0.
-            histograms[feature_idx, bin_idx].sum_hessians = 0.
-            histograms[feature_idx, bin_idx].count = 0
-
-        for bin_idx in range(split_bin_end, self.n_bins):
-            histograms[feature_idx, bin_idx].sum_gradients = 0.
-            histograms[feature_idx, bin_idx].sum_hessians = 0.
-            histograms[feature_idx, bin_idx].count = 0
-
-        for bin_idx in range(split_bin_start, split_bin_end):
-            histograms[feature_idx, bin_idx].sum_gradients = (
-                parent_histograms[feature_idx, bin_idx].sum_gradients
-            )
-            histograms[feature_idx, bin_idx].sum_hessians = (
-                parent_histograms[feature_idx, bin_idx].sum_hessians
-            )
-            histograms[feature_idx, bin_idx].count = (
-                parent_histograms[feature_idx, bin_idx].count
-            )
+            unsigned char in_left_binset
+            BITSET_INNER_DTYPE_C* p_left_cat_bitset = &left_cat_bitset[0]
+
+        if is_categorical:
+            for bin_idx in range(self.n_bins):
+                in_left_binset = in_bitset(p_left_cat_bitset, bin_idx)
+                if (is_left_child and in_left_binset) or (not is_left_child and not in_left_binset):
+                    histograms[feature_idx, bin_idx].sum_gradients = (
+                        parent_histograms[feature_idx, bin_idx].sum_gradients
+                    )
+                    histograms[feature_idx, bin_idx].sum_hessians = (
+                        parent_histograms[feature_idx, bin_idx].sum_hessians
+                    )
+                    histograms[feature_idx, bin_idx].count = (
+                        parent_histograms[feature_idx, bin_idx].count
+                    )
+                else:
+                    histograms[feature_idx, bin_idx].sum_gradients = 0.
+                    histograms[feature_idx, bin_idx].sum_hessians = 0.
+                    histograms[feature_idx, bin_idx].count = 0
+        else:
+            for bin_idx in range(split_bin_start):
+                histograms[feature_idx, bin_idx].sum_gradients = 0.
+                histograms[feature_idx, bin_idx].sum_hessians = 0.
+                histograms[feature_idx, bin_idx].count = 0
+
+            for bin_idx in range(split_bin_end, self.n_bins):
+                histograms[feature_idx, bin_idx].sum_gradients = 0.
+                histograms[feature_idx, bin_idx].sum_hessians = 0.
+                histograms[feature_idx, bin_idx].count = 0
+
+            for bin_idx in range(split_bin_start, split_bin_end):
+                histograms[feature_idx, bin_idx].sum_gradients = (
+                    parent_histograms[feature_idx, bin_idx].sum_gradients
+                )
+                histograms[feature_idx, bin_idx].sum_hessians = (
+                    parent_histograms[feature_idx, bin_idx].sum_hessians
+                )
+                histograms[feature_idx, bin_idx].count = (
+                    parent_histograms[feature_idx, bin_idx].count
+                )
 
     cdef void _compute_histogram_brute_single_feature(
             HistogramBuilder self,

From b512c3ac2813d25ee15bb1b7e605cd161cc734e4 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Tue, 14 Nov 2023 22:00:39 +0100
Subject: [PATCH 06/12] TST add test for
 _compute_histogram_single_feature_from_parent

---
 .../tests/test_histogram.py                   | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
index 99f74b0f542ee..86d25276995d3 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
@@ -2,12 +2,14 @@
 import pytest
 from numpy.testing import assert_allclose, assert_array_equal
 
+from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview
 from sklearn.ensemble._hist_gradient_boosting.common import (
     G_H_DTYPE,
     HISTOGRAM_DTYPE,
     X_BINNED_DTYPE,
 )
 from sklearn.ensemble._hist_gradient_boosting.histogram import (
+    HistogramBuilder,
     _build_histogram,
     _build_histogram_naive,
     _build_histogram_no_hessian,
@@ -15,6 +17,7 @@
     _build_histogram_root_no_hessian,
     _subtract_histograms,
 )
+from sklearn.ensemble._hist_gradient_boosting.splitting import SplitInfo
 
 
 @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram])
@@ -237,3 +240,89 @@ def test_hist_subtraction(constant_hessian):
     for key in ("count", "sum_hessians", "sum_gradients"):
         assert_allclose(hist_left[key], hist_left_sub[key], rtol=1e-6)
         assert_allclose(hist_right[key], hist_right_sub[key], rtol=1e-6)
+
+
+@pytest.mark.parametrize("is_categorical", [False, True])
+def test_compute_histogram_single_feature_from_parent(is_categorical):
+    """Test _compute_histogram_single_feature_from_parent."""
+    n_bins = 4
+    X_binned = np.array([0, 1, 2, 3, 0, 1, 2, 3], dtype=X_BINNED_DTYPE)[:, None]
+    gradients = np.array([-2, -1, 1, 2, -2, -1, 1, 2], dtype=G_H_DTYPE)
+    hessians = np.array([-4, -2, 1, 2, -4, -2, 1, 2], dtype=G_H_DTYPE)
+    # Only bins 0 and 1 go to (child) histogram.
+    sample_indices = np.array([0, 1, 4, 5]).astype(np.uint32)
+    left_cat_bitset = np.zeros(shape=(8,), dtype=np.uint32)
+    set_bitset_memoryview(left_cat_bitset, 0)
+    set_bitset_memoryview(left_cat_bitset, 1)
+    assert left_cat_bitset[0] == 3  # 2**0 + 2**1 for bins 0 and 1
+
+    histogram_builder = HistogramBuilder(
+        X_binned,
+        n_bins,
+        gradients,
+        hessians,
+        hessians_are_constant=False,
+        n_threads=1,
+    )
+    split_info = SplitInfo(
+        gain=1,  # irrelevant for now
+        feature_idx=0,
+        bin_idx=1,
+        missing_go_to_left=True,  # irrelevant for now
+        sum_gradient_left=0,  # irrelevant for now
+        sum_hessian_left=0,  # irrelevant for now
+        sum_gradient_right=0,  # irrelevant for now
+        sum_hessian_right=0,  # irrelevant for now
+        n_samples_left=0,  # irrelevant for now
+        n_samples_right=0,  # irrelevant for now
+        value_left=0,  # irrelevant for now
+        value_right=0,  # irrelevant for now
+        is_categorical=is_categorical,
+        left_cat_bitset=left_cat_bitset,
+    )
+    hist_parent = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    hist_parent[0, :]["count"] = 2
+    hist_parent[0, 0]["sum_gradients"] = -2 * 2
+    hist_parent[0, 1]["sum_gradients"] = -1 * 2
+    hist_parent[0, 2]["sum_gradients"] = 1 * 2
+    hist_parent[0, 3]["sum_gradients"] = 2 * 2
+    hist_parent[0, 0]["sum_hessians"] = -4 * 2
+    hist_parent[0, 1]["sum_hessians"] = -2 * 2
+    hist_parent[0, 2]["sum_hessians"] = 1 * 2
+    hist_parent[0, 3]["sum_hessians"] = 2 * 2
+
+    hist1 = np.asarray(
+        histogram_builder.compute_histograms_brute(
+            sample_indices=sample_indices,
+            allowed_features=None,
+            parent_split_info=None,
+            parent_histograms=None,
+            is_left_child=True,
+        )
+    )
+
+    hist2 = np.asanyarray(
+        histogram_builder.compute_histograms_brute(
+            sample_indices=sample_indices,
+            allowed_features=None,
+            parent_split_info=split_info,
+            parent_histograms=hist_parent,
+            is_left_child=True,
+        )
+    )
+
+    hist3 = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    histogram_builder._compute_histogram_single_feature_from_parent(
+        feature_idx=0,
+        split_bin_start=0,
+        split_bin_end=1 + 1,
+        is_categorical=is_categorical,
+        left_cat_bitset=left_cat_bitset,
+        is_left_child=True,
+        histograms=hist3,
+        parent_histograms=hist_parent,
+    )
+
+    for key in ("count", "sum_hessians", "sum_gradients"):
+        assert_allclose(hist2[key], hist1[key], rtol=1e-6)
+        assert_allclose(hist3[key], hist1[key], rtol=1e-6)

From a90bdc0b03aee3b51049c3c2358471ec4df6ac57 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Tue, 14 Nov 2023 22:46:54 +0100
Subject: [PATCH 07/12] DOC add whatsnew

---
 doc/whats_new/v1.4.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 5a9df10e8c49f..91f976465d95d 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -303,6 +303,13 @@ Changelog
   :pr:`13649` by :user:`Samuel Ronsin <samronsin>`,
   initiated by :user:`Patrick O'Reilly <pat-oreilly>`.
 
+- |Efficiency| :class:`ensemble.HistGradientBoostingClassifier` and
+  :class:`ensemble.HistGradientBoostingRegressor` are faster, roughly `1/n_features`
+  faster to before with a single thread. The estimators now reuse the parent's node
+  histogram for the single feature that was split on, i.e. just copy the parent's node
+  histogram values for the corresponding bins.
+  :pr:`26189` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Efficiency| :class:`ensemble.GradientBoostingClassifier` is faster,
   for binary and in particular for multiclass problems thanks to the private loss
   function module.

From 8e4e1ce3c9427dde33377bb9abe7495a4a6b0ed2 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Tue, 14 Nov 2023 22:49:42 +0100
Subject: [PATCH 08/12] DOC correct formula for speed up

---
 doc/whats_new/v1.4.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 91f976465d95d..023d2f687fa9c 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -304,10 +304,10 @@ Changelog
   initiated by :user:`Patrick O'Reilly <pat-oreilly>`.
 
 - |Efficiency| :class:`ensemble.HistGradientBoostingClassifier` and
-  :class:`ensemble.HistGradientBoostingRegressor` are faster, roughly `1/n_features`
-  faster to before with a single thread. The estimators now reuse the parent's node
-  histogram for the single feature that was split on, i.e. just copy the parent's node
-  histogram values for the corresponding bins.
+  :class:`ensemble.HistGradientBoostingRegressor` are faster, roughly
+  `1 - 1/n_features` faster to before with a single thread. The estimators now reuse
+  the parent's node histogram for the single feature that was split on, i.e. just copy
+  the parent's node histogram values for the corresponding bins.
   :pr:`26189` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 - |Efficiency| :class:`ensemble.GradientBoostingClassifier` is faster,

From 5f13884d90c5fa1627cd1f5e243bb1d273feb880 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Mon, 18 Dec 2023 18:46:23 +0100
Subject: [PATCH 09/12] FIX assign pointer to left_cat_bitset only if exist

---
 sklearn/ensemble/_hist_gradient_boosting/histogram.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 37cf475a6ce82..f8976533403e0 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -242,9 +242,10 @@ cdef class HistogramBuilder:
         cdef:
             unsigned int bin_idx = 0
             unsigned char in_left_binset
-            BITSET_INNER_DTYPE_C* p_left_cat_bitset = &left_cat_bitset[0]
+            BITSET_INNER_DTYPE_C* p_left_cat_bitset
 
         if is_categorical:
+            p_left_cat_bitset = &left_cat_bitset[0]
             for bin_idx in range(self.n_bins):
                 in_left_binset = in_bitset(p_left_cat_bitset, bin_idx)
                 if (is_left_child and in_left_binset) or (not is_left_child and not in_left_binset):

From 7be6bfea00f7c8e05f7c23ae70bd13c7d17a42cf Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Sat, 22 Feb 2025 18:18:11 +0100
Subject: [PATCH 10/12] CLN address review comments

---
 .../_hist_gradient_boosting/grower.py         | 15 +++++------
 .../_hist_gradient_boosting/histogram.pyx     | 27 +++++++++----------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index eebfd7909c651..ac291b616bbb5 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -581,14 +581,13 @@ def split_next(self):
             # (using histogram subtraction).
             n_samples_left = left_child_node.sample_indices.shape[0]
             n_samples_right = right_child_node.sample_indices.shape[0]
-            if n_samples_left < n_samples_right:
-                smallest_child = left_child_node
-                largest_child = right_child_node
-                is_left_child = True
-            else:
-                smallest_child = right_child_node
-                largest_child = left_child_node
-                is_left_child = False
+            is_left_child = n_samples_left < n_samples_right
+        if is_left_child:
+            smallest_child = left_child_node
+            largest_child = right_child_node
+        else:
+            smallest_child = right_child_node
+            largest_child = left_child_node
 
             # We use the brute O(n_samples) method on the child that has the
             # smallest number of samples, and the subtraction trick O(n_bins)
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index f8976533403e0..b72bea5034159 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -157,26 +157,25 @@ cdef class HistogramBuilder:
             )
             bint has_interaction_cst = allowed_features is not None
             # Feature index of the feature that the parent node was split on.
-            int split_feature_idx
+            int parent_split_feature_idx
             # Start of the bin indices belonging to the feature that was split on.
-            unsigned int split_bin_start
+            unsigned int parent_split_bin_start
             # End (+1) of the bin indices belonging to the feature that was split on.
-            unsigned int split_bin_end
+            unsigned int parent_split_bin_end
             unsigned char is_categorical
             BITSET_INNER_DTYPE_C [:] left_cat_bitset
-            bint has_parent_hist = False
+            bint has_parent_hist = parent_split_info is not None
             int n_threads = self.n_threads
 
-        if parent_split_info is not None:
-            has_parent_hist = True
-            split_feature_idx = parent_split_info.feature_idx
+        if has_parent_hist:
+            parent_split_feature_idx = parent_split_info.feature_idx
             is_categorical = parent_split_info.is_categorical
             if is_left_child:
-                split_bin_start = 0
-                split_bin_end = parent_split_info.bin_idx + 1
+                parent_split_bin_start = 0
+                parent_split_bin_end = parent_split_info.bin_idx + 1
             else:
-                split_bin_start = parent_split_info.bin_idx + 1
-                split_bin_end = self.n_bins
+                parent_split_bin_start = parent_split_info.bin_idx + 1
+                parent_split_bin_end = self.n_bins
             if is_categorical:
                 left_cat_bitset = parent_split_info.left_cat_bitset
 
@@ -209,11 +208,11 @@ cdef class HistogramBuilder:
                 else:
                     feature_idx = f_idx
 
-                if has_parent_hist and feature_idx == split_feature_idx:
+                if has_parent_hist and feature_idx == parent_split_feature_idx:
                     self._compute_histogram_single_feature_from_parent(
                         feature_idx=feature_idx,
-                        split_bin_start=split_bin_start,
-                        split_bin_end=split_bin_end,
+                        split_bin_start=parent_split_bin_start,
+                        split_bin_end=parent_split_bin_end,
                         is_categorical=is_categorical,
                         left_cat_bitset=left_cat_bitset,
                         is_left_child=is_left_child,

From ec8ca5dc87d61bd55c3132df9a89cc3d68d3bd69 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Sat, 22 Feb 2025 18:29:36 +0100
Subject: [PATCH 11/12] DOC new whatsnew entry

---
 .../upcoming_changes/sklearn.ensemble/26189.efficiency.rst | 6 ++++++
 doc/whats_new/v1.4.rst                                     | 7 -------
 2 files changed, 6 insertions(+), 7 deletions(-)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.ensemble/26189.efficiency.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.ensemble/26189.efficiency.rst b/doc/whats_new/upcoming_changes/sklearn.ensemble/26189.efficiency.rst
new file mode 100644
index 0000000000000..3b83889e3e226
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.ensemble/26189.efficiency.rst
@@ -0,0 +1,6 @@
+- :class:`ensemble.HistGradientBoostingClassifier` and
+  :class:`ensemble.HistGradientBoostingRegressor` are faster, roughly
+  `1 - 1/n_features` faster to before with a single thread. The estimators now reuse
+  the parent's node histogram for the single feature that was split on, i.e. just copy
+  the parent's node histogram values for the corresponding bins.
+  :pr:`26189` by :user:`Christian Lorentzen <lorentzenchr>`.
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index f8fca7e458330..29d4d87e68748 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -645,13 +645,6 @@ Changelog
   In effect, less memory has to be allocated and deallocated.
   :pr:`27865` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Efficiency| :class:`ensemble.HistGradientBoostingClassifier` and
-  :class:`ensemble.HistGradientBoostingRegressor` are faster, roughly
-  `1 - 1/n_features` faster to before with a single thread. The estimators now reuse
-  the parent's node histogram for the single feature that was split on, i.e. just copy
-  the parent's node histogram values for the corresponding bins.
-  :pr:`26189` by :user:`Christian Lorentzen <lorentzenchr>`.
-
 - |Efficiency| :class:`ensemble.GradientBoostingClassifier` is faster,
   for binary and in particular for multiclass problems thanks to the private loss
   function module.

From 392d553798b4b683cf812ce699e1727a7cb446d3 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Sat, 22 Feb 2025 18:35:20 +0100
Subject: [PATCH 12/12] FIX code indentation

---
 sklearn/ensemble/_hist_gradient_boosting/grower.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 8b5830030b16a..ca30342616ddd 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -591,12 +591,12 @@ def split_next(self):
             n_samples_left = left_child_node.sample_indices.shape[0]
             n_samples_right = right_child_node.sample_indices.shape[0]
             is_left_child = n_samples_left < n_samples_right
-        if is_left_child:
-            smallest_child = left_child_node
-            largest_child = right_child_node
-        else:
-            smallest_child = right_child_node
-            largest_child = left_child_node
+            if is_left_child:
+                smallest_child = left_child_node
+                largest_child = right_child_node
+            else:
+                smallest_child = right_child_node
+                largest_child = left_child_node
 
             # We use the brute O(n_samples) method on the child that has the
             # smallest number of samples, and the subtraction trick O(n_bins)