From 418494f1de3e43036bdcbc768fddf2e13849aa9e Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 17 Jul 2019 09:44:53 -0400
Subject: [PATCH 1/3] Better histogram initialization routines

---
 .../_hist_gradient_boosting/grower.py         |  5 +++
 .../_hist_gradient_boosting/histogram.pyx     | 43 +++++++++++++++----
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 7eec680082e97..7816187cfdfad 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -389,6 +389,11 @@ def split_next(self):
                 self._compute_best_split_and_push(right_child_node)
             self.total_find_split_time += time() - tic
 
+        # Both children now have their split_info computed. We don't need the
+        # histogram of the parent anymore, so we can reuse its allocated space
+        # for other histograms.
+        self.histogram_builder.mark_as_available(node.histograms)
+
         return left_child_node, right_child_node
 
     def _finalize_leaf(self, node):
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index c83fa0c79db71..63ab091097840 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -83,6 +83,7 @@ cdef class HistogramBuilder:
         G_H_DTYPE_C [::1] ordered_gradients
         G_H_DTYPE_C [::1] ordered_hessians
         unsigned char hessians_are_constant
+        list available_histograms
 
     def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned,
                  unsigned int max_bins, G_H_DTYPE_C [::1] gradients,
@@ -101,6 +102,27 @@ cdef class HistogramBuilder:
         self.ordered_hessians = hessians.copy()
         self.hessians_are_constant = hessians_are_constant
 
+        # list of histograms that can be re-used for other nodes. These are the
+        # histograms of the nodes whose both children's split_info have been
+        # computed.
+        self.available_histograms = []
+
+    def allocate_or_reuse_histograms(HistogramBuilder self):
+        """Return a non-initialized histograms array.
+
+        The array is allocated only if needed.
+        """
+        if self.available_histograms:
+            return self.available_histograms.pop()
+        else:
+            return np.empty(
+                shape=(self.n_features, self.max_bins),
+                dtype=HISTOGRAM_DTYPE
+            )
+
+    def mark_as_available(HistogramBuilder self, histograms):
+        self.available_histograms.append(histograms)
+
     def compute_histograms_brute(
             HistogramBuilder self,
             const unsigned int [::1] sample_indices):  # IN
@@ -130,10 +152,9 @@ cdef class HistogramBuilder:
             G_H_DTYPE_C [::1] gradients = self.gradients
             G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians
             G_H_DTYPE_C [::1] hessians = self.hessians
-            hist_struct [:, ::1] histograms = np.zeros(
-                shape=(self.n_features, self.max_bins),
-                dtype=HISTOGRAM_DTYPE
-            )
+            hist_struct [:, ::1] histograms
+
+        histograms = self.allocate_or_reuse_histograms()
 
         with nogil:
             n_samples = sample_indices.shape[0]
@@ -175,6 +196,13 @@ cdef class HistogramBuilder:
                 self.ordered_hessians[:n_samples]
             unsigned char hessians_are_constant = \
                 self.hessians_are_constant
+            unsigned int bin_idx = 0
+
+        # Need to initialize histograms to 0 since all the helpers use +=
+        for bin_idx in range(self.max_bins):
+            histograms[feature_idx, bin_idx].sum_gradients = 0.
+            histograms[feature_idx, bin_idx].sum_hessians = 0.
+            histograms[feature_idx, bin_idx].count = 0
 
         if root_node:
             if hessians_are_constant:
@@ -225,10 +253,9 @@ cdef class HistogramBuilder:
         cdef:
             int feature_idx
             int n_features = self.n_features
-            hist_struct [:, ::1] histograms = np.zeros(
-                shape=(self.n_features, self.max_bins),
-                dtype=HISTOGRAM_DTYPE
-            )
+            hist_struct [:, ::1] histograms
+
+        histograms = self.allocate_or_reuse_histograms()
 
         for feature_idx in prange(n_features, schedule='static', nogil=True):
             # Compute histogram of each feature

From 7e9fe384821d9928e3c3ca9c0e662915071d7a88 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 7 Sep 2020 10:40:34 -0400
Subject: [PATCH 2/3] Changed name + whatsnew

---
 doc/whats_new/v0.24.rst                       | 29 +++++++++++--------
 .../_hist_gradient_boosting/histogram.pyx     |  6 ++--
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index c57f097ec3218..6d1078b584854 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -188,19 +188,24 @@ Changelog
   method `staged_predict`, which allows monitoring of each stage.
   :pr:`16985` by :user:`Hao Chun Chang <haochunchang>`.
 
-- |Efficiency| break cyclic references in the tree nodes used internally in
+- |Efficiency| Various improvements were made to 
   :class:`ensemble.HistGradientBoostingRegressor` and
-  :class:`ensemble.HistGradientBoostingClassifier` to allow for the timely
-  garbage collection of large intermediate datastructures and to improve memory
-  usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas Hug`_, `Thomas
-  Fan`_ and `Andreas Müller`_.
-
-- |Efficiency| Histogram initialization is now done in parallel in
-  :class:`ensemble.HistGradientBoostingRegressor` and
-  :class:`ensemble.HistGradientBoostingClassifier` which results in speed
-  improvement for problems that build a lot of nodes on multicore machines.
-  :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and
-  :user:`Egor Smirnov <SmirnovEgorRu>`.
+  :class:`ensemble.HistGradientBoostingClassifier` which lead to less memory
+  usage, as well as faster training times:
+
+  - break cyclic references in the tree nodes used internally to allow for
+    the timely garbage collection of large intermediate datastructures and to
+    improve memory usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas
+    Hug`_, `Thomas Fan`_ and `Andreas Müller`_.
+
+  - Histogram initialization is now done in parallel which results in speed
+    improvement on multicore machines, for problems that build a lot of nodes.
+    :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and
+    :user:`Egor Smirnov <SmirnovEgorRu>`.
+  
+  - Allocated histograms can be reused by other nodes of the same tree,
+    leading to less memory allocations. :pr:`14392` by `Olivier Grisel`_,
+    `Nicolas Hug`_, `Thomas Fan`_.
 
 - |API|: The parameter ``n_classes_`` is now deprecated in
   :class:`ensemble.GradientBoostingRegressor` and returns `1`.
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index c935fc11b08c7..587cde90abb88 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -107,7 +107,7 @@ cdef class HistogramBuilder:
         # list of histograms that can be re-used for other nodes.
         self.available_histograms = []
 
-    def get_empty_histograms(HistogramBuilder self):
+    def allocate_or_reuse_hists(HistogramBuilder self):
         """Return a non-initialized histograms array.
 
         The array is allocated only if needed.
@@ -154,7 +154,7 @@ cdef class HistogramBuilder:
             G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians
             G_H_DTYPE_C [::1] hessians = self.hessians
             # Histograms will be initialized to zero later within a prange
-            hist_struct [:, ::1] histograms = self.get_empty_histograms()
+            hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists()
 
         with nogil:
             n_samples = sample_indices.shape[0]
@@ -252,7 +252,7 @@ cdef class HistogramBuilder:
         cdef:
             int feature_idx
             int n_features = self.n_features
-            hist_struct [:, ::1] histograms = self.get_empty_histograms()
+            hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists()
 
         for feature_idx in prange(n_features, schedule='static', nogil=True):
             # Compute histogram of each feature

From 22877fc61594628e3f28616a016c5318acc3bea4 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 8 Sep 2020 10:00:43 +0200
Subject: [PATCH 3/3] less => fewer (countable)

---
 doc/whats_new/v0.24.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 6d1078b584854..fc80554109877 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -204,7 +204,7 @@ Changelog
     :user:`Egor Smirnov <SmirnovEgorRu>`.
   
   - Allocated histograms can be reused by other nodes of the same tree,
-    leading to less memory allocations. :pr:`14392` by `Olivier Grisel`_,
+    leading to fewer memory allocations. :pr:`14392` by `Olivier Grisel`_,
     `Nicolas Hug`_, `Thomas Fan`_.
 
 - |API|: The parameter ``n_classes_`` is now deprecated in