diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 8944c1c292284..204cbb9e9c890 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -353,6 +353,12 @@ Changelog
   :pr:`13649` by :user:`Samuel Ronsin <samronsin>`,
   initiated by :user:`Patrick O'Reilly <pat-oreilly>`.
 
+- |Efficiency| :class:`ensemble.HistGradientBoostingClassifier` and
+  :class:`ensemble.HistGradientBoostingRegressor` are now a bit faster by reusing
+  the parent node's histogram as children node's histogram in the subtraction trick.
+  In effect, less memory has to be allocated and deallocated.
+  :pr:`27865` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Efficiency| :class:`ensemble.GradientBoostingClassifier` is faster,
   for binary and in particular for multiclass problems thanks to the private loss
   function module.
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 44392362fd60c..8cf40d2a64539 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -603,6 +603,9 @@ def split_next(self):
                     smallest_child.allowed_features,
                 )
             )
+            # node.histograms is reused in largest_child.histograms. To break cyclic
+            # memory references and help garbage collection, we set it to None.
+            node.histograms = None
             self.total_compute_hist_time += time() - tic
 
             tic = time()
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 336ba372cb53a..5af972d85f0c4 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -224,7 +224,7 @@ cdef class HistogramBuilder:
 
     def compute_histograms_subtraction(
         HistogramBuilder self,
-        hist_struct [:, ::1] parent_histograms,        # IN
+        hist_struct [:, ::1] parent_histograms,        # IN and OUT
         hist_struct [:, ::1] sibling_histograms,       # IN
         const unsigned int [:] allowed_features=None,  # IN
     ):
@@ -252,16 +252,14 @@ cdef class HistogramBuilder:
         -------
         histograms : ndarray of HISTOGRAM_DTYPE, shape(n_features, n_bins)
             The computed histograms of the current node.
+            We repurpose parent_histograms for this and don't need to allocate new
+            memory.
         """
 
         cdef:
             int feature_idx
             int f_idx
             int n_allowed_features = self.n_features
-            hist_struct [:, ::1] histograms = np.empty(
-                shape=(self.n_features, self.n_bins),
-                dtype=HISTOGRAM_DTYPE
-            )
             bint has_interaction_cst = allowed_features is not None
             int n_threads = self.n_threads
 
@@ -281,9 +279,8 @@ cdef class HistogramBuilder:
                 self.n_bins,
                 parent_histograms,
                 sibling_histograms,
-                histograms,
             )
-        return histograms
+        return parent_histograms
 
 
 cpdef void _build_histogram_naive(
@@ -313,25 +310,16 @@ cpdef void _build_histogram_naive(
 cpdef void _subtract_histograms(
         const int feature_idx,
         unsigned int n_bins,
-        hist_struct [:, ::1] hist_a,  # IN
+        hist_struct [:, ::1] hist_a,  # IN and OUT
         hist_struct [:, ::1] hist_b,  # IN
-        hist_struct [:, ::1] out) noexcept nogil:  # OUT
-    """compute (hist_a - hist_b) in out"""
+) noexcept nogil:  # OUT
+    """compute hist_a = hist_a - hist_b"""
     cdef:
         unsigned int i = 0
     for i in range(n_bins):
-        out[feature_idx, i].sum_gradients = (
-            hist_a[feature_idx, i].sum_gradients -
-            hist_b[feature_idx, i].sum_gradients
-        )
-        out[feature_idx, i].sum_hessians = (
-            hist_a[feature_idx, i].sum_hessians -
-            hist_b[feature_idx, i].sum_hessians
-        )
-        out[feature_idx, i].count = (
-            hist_a[feature_idx, i].count -
-            hist_b[feature_idx, i].count
-        )
+        hist_a[feature_idx, i].sum_gradients -= hist_b[feature_idx, i].sum_gradients
+        hist_a[feature_idx, i].sum_hessians -= hist_b[feature_idx, i].sum_hessians
+        hist_a[feature_idx, i].count -= hist_b[feature_idx, i].count
 
 
 cpdef void _build_histogram(
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
index 99f74b0f542ee..22375c7d4ea2c 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
@@ -229,10 +229,10 @@ def test_hist_subtraction(constant_hessian):
             hist_right,
         )
 
-    hist_left_sub = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
-    hist_right_sub = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
-    _subtract_histograms(0, n_bins, hist_parent, hist_right, hist_left_sub)
-    _subtract_histograms(0, n_bins, hist_parent, hist_left, hist_right_sub)
+    hist_left_sub = np.copy(hist_parent)
+    hist_right_sub = np.copy(hist_parent)
+    _subtract_histograms(0, n_bins, hist_left_sub, hist_right)
+    _subtract_histograms(0, n_bins, hist_right_sub, hist_left)
 
     for key in ("count", "sum_hessians", "sum_gradients"):
         assert_allclose(hist_left[key], hist_left_sub[key], rtol=1e-6)