scikit-learn · aadamson · May 11, 2020 · thomasjpfan · Dec 17, 2020 · samdbrice
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -44,6 +44,15 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Fix a bug in `_make_estimator` of :class:`ensemble.BaseEnsemble` that
+  caused segmentation faults under certain conditions. `_make_estimator` now
+  deep copies each element of `estimator_params` when creating estimators
+  to prevent shared access to underlying mutable attributes in the parameters.
+  :pr:`18985` by :user:`Alex Adamson <aadamson>` and
+  :user:`Wil Yegelwel <wyegelwel>`.
 
 
 Code and Documentation Contributors

diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
@@ -4,6 +4,7 @@
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
+import copy
 import numbers
 from typing import List
 
@@ -148,7 +149,9 @@ def _make_estimator(self, append=True, random_state=None):
         sub-estimators.
         """
         estimator = clone(self.base_estimator_)
-        estimator.set_params(**{p: getattr(self, p)
+        # Make a deepcopy in case one of the base estimators has a mutable
+        # parameter that might be shared and modified during parallel fitting
+        estimator.set_params(**{p: copy.deepcopy(getattr(self, p))
                                 for p in self.estimator_params})
 
         if random_state is not None:

diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -1377,3 +1377,22 @@ def test_little_tree_with_small_max_samples(ForestClass):
 
     msg = "Tree without `max_samples` restriction should have more nodes"
     assert tree1.node_count > tree2.node_count, msg
+
+
+@pytest.mark.parametrize('Forest', FOREST_REGRESSORS)
+def test_mse_criterion_object_segfault_smoke_test(Forest):
+    # Ensure that we can pass a mutable criterion while using parallel fit
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/12623
+    from sklearn.tree._classes import CRITERIA_REG
+
+    X = np.random.random((1000, 3))
+    y = np.random.random((1000, 1))
+
+    n_samples, n_outputs = y.shape
+    mse_criterion = CRITERIA_REG['mse'](n_outputs, n_samples)
+    est = FOREST_REGRESSORS[Forest](
+        n_estimators=2, n_jobs=2, criterion=mse_criterion
 criterion = self.criterion 
 if not isinstance(criterion, Criterion): 
     if is_classification: 
         criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, 
                                                  self.n_classes_) 
     else: 
         criterion = CRITERIA_REG[self.criterion](self.n_outputs_, 
                                                  n_samples) 
 criterion = self.criterion 
 if not isinstance(criterion, Criterion): 
     if is_classification: 
         criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, 
                                                  self.n_classes_) 
     else: 
         criterion = CRITERIA_REG[self.criterion](self.n_outputs_, 
                                                  n_samples) 
+    )
+
+    est.fit(X, y)