diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index c57f097ec3218..fc80554109877 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -188,19 +188,24 @@ Changelog method `staged_predict`, which allows monitoring of each stage. :pr:`16985` by :user:`Hao Chun Chang `. -- |Efficiency| break cyclic references in the tree nodes used internally in +- |Efficiency| Various improvements were made to :class:`ensemble.HistGradientBoostingRegressor` and - :class:`ensemble.HistGradientBoostingClassifier` to allow for the timely - garbage collection of large intermediate datastructures and to improve memory - usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas Hug`_, `Thomas - Fan`_ and `Andreas Müller`_. - -- |Efficiency| Histogram initialization is now done in parallel in - :class:`ensemble.HistGradientBoostingRegressor` and - :class:`ensemble.HistGradientBoostingClassifier` which results in speed - improvement for problems that build a lot of nodes on multicore machines. - :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and - :user:`Egor Smirnov `. + :class:`ensemble.HistGradientBoostingClassifier` which lead to less memory + usage, as well as faster training times: + + - break cyclic references in the tree nodes used internally to allow for + the timely garbage collection of large intermediate datastructures and to + improve memory usage in `fit`. :pr:`18334` by `Olivier Grisel`_ `Nicolas + Hug`_, `Thomas Fan`_ and `Andreas Müller`_. + + - Histogram initialization is now done in parallel which results in speed + improvement on multicore machines, for problems that build a lot of nodes. + :pr:`18341` by `Olivier Grisel`_, `Nicolas Hug`_, `Thomas Fan`_, and + :user:`Egor Smirnov `. + + - Allocated histograms can be reused by other nodes of the same tree, + leading to fewer memory allocations. :pr:`14392` by `Olivier Grisel`_, + `Nicolas Hug`_, `Thomas Fan`_. - |API|: The parameter ``n_classes_`` is now deprecated in :class:`ensemble.GradientBoostingRegressor` and returns `1`. diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 473dc37674684..ed3ddd3144301 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -485,10 +485,12 @@ def split_next(self): # for leaf nodes since they won't be split. for child in (left_child_node, right_child_node): if child.is_leaf: + self.histogram_builder.release(child.histograms) del child.histograms # Release memory used by histograms as they are no longer needed for # internal nodes once children histograms have been computed. + self.histogram_builder.release(node.histograms) del node.histograms return left_child_node, right_child_node diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index 625aecc4f09f5..587cde90abb88 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -85,6 +85,7 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] ordered_gradients G_H_DTYPE_C [::1] ordered_hessians unsigned char hessians_are_constant + list available_histograms def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned, unsigned int n_bins, G_H_DTYPE_C [::1] gradients, @@ -103,6 +104,26 @@ cdef class HistogramBuilder: self.ordered_hessians = hessians.copy() self.hessians_are_constant = hessians_are_constant + # list of histograms that can be re-used for other nodes. + self.available_histograms = [] + + def allocate_or_reuse_hists(HistogramBuilder self): + """Return a non-initialized histograms array. + + The array is allocated only if needed. + """ + if self.available_histograms: + return self.available_histograms.pop() + else: + return np.empty( + shape=(self.n_features, self.n_bins), + dtype=HISTOGRAM_DTYPE + ) + + def release(HistogramBuilder self, histograms): + """Mark a histograms array as available so it can be reused by other nodes""" + self.available_histograms.append(histograms) + def compute_histograms_brute( HistogramBuilder self, const unsigned int [::1] sample_indices): # IN @@ -133,10 +154,7 @@ cdef class HistogramBuilder: G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians G_H_DTYPE_C [::1] hessians = self.hessians # Histograms will be initialized to zero later within a prange - hist_struct [:, ::1] histograms = np.empty( - shape=(self.n_features, self.n_bins), - dtype=HISTOGRAM_DTYPE - ) + hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists() with nogil: n_samples = sample_indices.shape[0] @@ -234,10 +252,7 @@ cdef class HistogramBuilder: cdef: int feature_idx int n_features = self.n_features - hist_struct [:, ::1] histograms = np.empty( - shape=(self.n_features, self.n_bins), - dtype=HISTOGRAM_DTYPE - ) + hist_struct [:, ::1] histograms = self.allocate_or_reuse_hists() for feature_idx in prange(n_features, schedule='static', nogil=True): # Compute histogram of each feature