From 418494f1de3e43036bdcbc768fddf2e13849aa9e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 17 Jul 2019 09:44:53 -0400 Subject: [PATCH 1/3] Better histogram initialization routines --- .../_hist_gradient_boosting/grower.py | 5 +++ .../_hist_gradient_boosting/histogram.pyx | 43 +++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 7eec680082e97..7816187cfdfad 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -389,6 +389,11 @@ def split_next(self): self._compute_best_split_and_push(right_child_node) self.total_find_split_time += time() - tic + # Both children now have their split_info computed. We don't need the + # histogram of the parent anymore, so we can reuse its allocated space + # for other histograms. + self.histogram_builder.mark_as_available(node.histograms) + return left_child_node, right_child_node def _finalize_leaf(self, node): diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index c83fa0c79db71..63ab091097840 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -83,6 +83,7 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] ordered_gradients G_H_DTYPE_C [::1] ordered_hessians unsigned char hessians_are_constant + list available_histograms def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned, unsigned int max_bins, G_H_DTYPE_C [::1] gradients, @@ -101,6 +102,27 @@ cdef class HistogramBuilder: self.ordered_hessians = hessians.copy() self.hessians_are_constant = hessians_are_constant + # list of histograms that can be re-used for other nodes. These are the + # histograms of the nodes whose both children's split_info have been + # computed. + self.available_histograms = [] + + def allocate_or_reuse_histograms(HistogramBuilder self): + """Return a non-initialized histograms array. + + The array is allocated only if needed. + """ + if self.available_histograms: + return self.available_histograms.pop() + else: + return np.empty( + shape=(self.n_features, self.max_bins), + dtype=HISTOGRAM_DTYPE + ) + + def mark_as_available(HistogramBuilder self, histograms): + self.available_histograms.append(histograms) + def compute_histograms_brute( HistogramBuilder self, const unsigned int [::1] sample_indices): # IN @@ -130,10 +152,9 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] gradients = self.gradients G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians G_H_DTYPE_C [::1] hessians = self.hessians - hist_struct [:, ::1] histograms = np.zeros( - shape=(self.n_features, self.max_bins), - dtype=HISTOGRAM_DTYPE - ) + hist_struct [:, ::1] histograms + + histograms = self.allocate_or_reuse_histograms() with nogil: n_samples = sample_indices.shape[0] @@ -175,6 +196,13 @@ cdef class HistogramBuilder: self.ordered_hessians[:n_samples] unsigned char hessians_are_constant = \ self.hessians_are_constant + unsigned int bin_idx = 0 + + # Need to initialize histograms to 0 since all the helpers use += + for bin_idx in range(self.max_bins): + histograms[feature_idx, bin_idx].sum_gradients = 0. + histograms[feature_idx, bin_idx].sum_hessians = 0. + histograms[feature_idx, bin_idx].count = 0 if root_node: if hessians_are_constant: @@ -225,10 +253,9 @@ cdef class HistogramBuilder: cdef: int feature_idx int n_features = self.n_features - hist_struct [:, ::1] histograms = np.zeros( - shape=(self.n_features, self.max_bins), - dtype=HISTOGRAM_DTYPE - ) + hist_struct [:, ::1] histograms + + histograms = self.allocate_or_reuse_histograms() for feature_idx in prange(n_features, schedule='static', nogil=True): # Compute histogram of each feature From 7e9fe384821d9928e3c3ca9c0e662915071d7a88 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 7 Sep 2020 10:40:34 -0400 Subject: [PATCH 2/3] Changed name + whatsnew --- doc/whats_new/v0.24.rst | 29 +++++++++++-------- .../_hist_gradient_boosting/histogram.pyx | 6 ++-- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index c57f097ec3218..6d1078b584854 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -188,19 +188,24 @@ Changelog method `staged_predict`, which allows monitoring of each stage. :pr:`16985` by :user:`Hao Chun Chang `. -- |Efficiency| break cyclic references in the tree nodes used internally in +- |Efficiency| Various improvements were made to :class:`ensemble.HistGradientBoostingRegressor` and - :class:`ensemble.HistGradientBoostingClassifier` to allow for the timely - garbage collection of large intermediate datastructures and to improve memory - usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas Hug`_, `Thomas - Fan`_ and `Andreas Müller`_. - -- |Efficiency| Histogram initialization is now done in parallel in - :class:`ensemble.HistGradientBoostingRegressor` and - :class:`ensemble.HistGradientBoostingClassifier` which results in speed - improvement for problems that build a lot of nodes on multicore machines. - :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and - :user:`Egor Smirnov `. + :class:`ensemble.HistGradientBoostingClassifier` which lead to less memory + usage, as well as faster training times: + + - break cyclic references in the tree nodes used internally to allow for + the timely garbage collection of large intermediate datastructures and to + improve memory usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas + Hug`_, `Thomas Fan`_ and `Andreas Müller`_. + + - Histogram initialization is now done in parallel which results in speed + improvement on multicore machines, for problems that build a lot of nodes. + :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and + :user:`Egor Smirnov `. + + - Allocated histograms can be reused by other nodes of the same tree, + leading to less memory allocations. :pr:`14392` by `Olivier Grisel`_, + `Nicolas Hug`_, `Thomas Fan`_. - |API|: The parameter ``n_classes_`` is now deprecated in :class:`ensemble.GradientBoostingRegressor` and returns `1`. diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index c935fc11b08c7..587cde90abb88 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -107,7 +107,7 @@ cdef class HistogramBuilder: # list of histograms that can be re-used for other nodes. self.available_histograms = [] - def get_empty_histograms(HistogramBuilder self): + def allocate_or_reuse_hists(HistogramBuilder self): """Return a non-initialized histograms array. The array is allocated only if needed. @@ -154,7 +154,7 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians G_H_DTYPE_C [::1] hessians = self.hessians # Histograms will be initialized to zero later within a prange - hist_struct [:, ::1] histograms = self.get_empty_histograms() + hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists() with nogil: n_samples = sample_indices.shape[0] @@ -252,7 +252,7 @@ cdef class HistogramBuilder: cdef: int feature_idx int n_features = self.n_features - hist_struct [:, ::1] histograms = self.get_empty_histograms() + hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists() for feature_idx in prange(n_features, schedule='static', nogil=True): # Compute histogram of each feature From 22877fc61594628e3f28616a016c5318acc3bea4 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 8 Sep 2020 10:00:43 +0200 Subject: [PATCH 3/3] less => fewer (countable) --- doc/whats_new/v0.24.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 6d1078b584854..fc80554109877 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -204,7 +204,7 @@ Changelog :user:`Egor Smirnov `. - Allocated histograms can be reused by other nodes of the same tree, - leading to less memory allocations. :pr:`14392` by `Olivier Grisel`_, + leading to fewer memory allocations. :pr:`14392` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_. - |API|: The parameter ``n_classes_`` is now deprecated in