From a2dce00d811a4ab7327a26e10bbfc19bb6d82d8e Mon Sep 17 00:00:00 2001 From: siavrez Date: Tue, 23 Nov 2021 02:02:06 +0330 Subject: [PATCH 01/18] accelerate plot_iterative_imputer_variants_comparison.py added bootstraping to ETrees and changed folds to 3 --- .../impute/plot_iterative_imputer_variants_comparison.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 9aef74343871c..81533b3c2cc28 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -56,7 +56,7 @@ from sklearn.pipeline import make_pipeline from sklearn.model_selection import cross_val_score -N_SPLITS = 5 +N_SPLITS = 3 rng = np.random.RandomState(0) @@ -98,7 +98,9 @@ estimators = [ BayesianRidge(), DecisionTreeRegressor(max_features="sqrt", random_state=0), - ExtraTreesRegressor(n_estimators=10, random_state=0), + ExtraTreesRegressor( + n_estimators=10, random_state=0, bootstrap=True, max_samples=0.75 + ), KNeighborsRegressor(n_neighbors=15), ] score_iterative_imputer = pd.DataFrame() @@ -109,7 +111,6 @@ score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score( estimator, X_missing, y_missing, scoring="neg_mean_squared_error", cv=N_SPLITS ) - scores = pd.concat( [score_full_data, score_simple_imputer, score_iterative_imputer], keys=["Original", "SimpleImputer", "IterativeImputer"], From 7893fc9d9ab7d68c4a6aad3ce4a7bf5ac3613009 Mon Sep 17 00:00:00 2001 From: siavrez Date: Tue, 23 Nov 2021 16:04:15 +0330 Subject: [PATCH 02/18] Added more params to ETregressor to reduce runtime --- .../impute/plot_iterative_imputer_variants_comparison.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 81533b3c2cc28..f2ef1479b1b07 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -99,7 +99,14 @@ BayesianRidge(), DecisionTreeRegressor(max_features="sqrt", random_state=0), ExtraTreesRegressor( - n_estimators=10, random_state=0, bootstrap=True, max_samples=0.75 + # We tuned the hyperparameters of the ExtraTreesRegressor to minimize + # the execution time + n_estimators=4, + max_depth=10, + bootstrap=True, + max_samples=0.5, + n_jobs=2, + random_state=0, ), KNeighborsRegressor(n_neighbors=15), ] From 2452df21127a03352a51a043a624e36261531ce2 Mon Sep 17 00:00:00 2001 From: siavrez Date: Tue, 23 Nov 2021 17:51:58 +0330 Subject: [PATCH 03/18] removed bootstrap=True, with max_sample param bootstrap is changed to True --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index f2ef1479b1b07..b6442b6174f4e 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -103,7 +103,6 @@ # the execution time n_estimators=4, max_depth=10, - bootstrap=True, max_samples=0.5, n_jobs=2, random_state=0, From 64eec520fed61313e83c05165de867ac2e5fa72a Mon Sep 17 00:00:00 2001 From: siavrez Date: Tue, 23 Nov 2021 21:01:49 +0330 Subject: [PATCH 04/18] change the folds to 5 --- examples/impute/plot_iterative_imputer_variants_comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index b6442b6174f4e..ba57a62208ff7 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -56,7 +56,7 @@ from sklearn.pipeline import make_pipeline from sklearn.model_selection import cross_val_score -N_SPLITS = 3 +N_SPLITS = 5 rng = np.random.RandomState(0) From 441564850941677b65ddbe55f430964beb1d132f Mon Sep 17 00:00:00 2001 From: siavrez Date: Wed, 24 Nov 2021 19:56:00 +0330 Subject: [PATCH 05/18] changing tree with random forest add tolerance for each model, change max_iter --- ...t_iterative_imputer_variants_comparison.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index ba57a62208ff7..13443e767ff79 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -50,8 +50,7 @@ from sklearn.impute import SimpleImputer from sklearn.impute import IterativeImputer from sklearn.linear_model import BayesianRidge -from sklearn.tree import DecisionTreeRegressor -from sklearn.ensemble import ExtraTreesRegressor +from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.pipeline import make_pipeline from sklearn.model_selection import cross_val_score @@ -66,7 +65,7 @@ X_full = X_full[::10] y_full = y_full[::10] n_samples, n_features = X_full.shape - +# X_full = MinMaxScaler().fit_transform(X_full) # Estimate the score on the entire dataset, with no missing values br_estimator = BayesianRidge() score_full_data = pd.DataFrame( @@ -97,7 +96,15 @@ # with different estimators estimators = [ BayesianRidge(), - DecisionTreeRegressor(max_features="sqrt", random_state=0), + RandomForestRegressor( + # We tuned the hyperparameters of the ExtraTreesRegressor to minimize + # the execution time + n_estimators=4, + max_depth=10, + max_samples=0.5, + n_jobs=2, + random_state=0, + ), ExtraTreesRegressor( # We tuned the hyperparameters of the ExtraTreesRegressor to minimize # the execution time @@ -110,9 +117,13 @@ KNeighborsRegressor(n_neighbors=15), ] score_iterative_imputer = pd.DataFrame() -for impute_estimator in estimators: +tolerances = (1e-3, 1e-1, 1e-1, 1e-2) +for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( - IterativeImputer(random_state=0, estimator=impute_estimator), br_estimator + IterativeImputer( + random_state=0, estimator=impute_estimator, max_iter=25, tol=tol + ), + br_estimator, ) score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score( estimator, X_missing, y_missing, scoring="neg_mean_squared_error", cv=N_SPLITS From 0f70fdb7f4ec5546fd70669d9f921c2ea28aa814 Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 20:14:31 +0330 Subject: [PATCH 06/18] removed comment --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 13443e767ff79..abe91f18a0e0d 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -65,7 +65,6 @@ X_full = X_full[::10] y_full = y_full[::10] n_samples, n_features = X_full.shape -# X_full = MinMaxScaler().fit_transform(X_full) # Estimate the score on the entire dataset, with no missing values br_estimator = BayesianRidge() score_full_data = pd.DataFrame( From f591ec0470bac2cac34256a994f85a0434f04538 Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 20:54:23 +0330 Subject: [PATCH 07/18] added bootstrap=True to randomforest --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index abe91f18a0e0d..8865284f055ad 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -100,6 +100,7 @@ # the execution time n_estimators=4, max_depth=10, + bootstrap=True, max_samples=0.5, n_jobs=2, random_state=0, From 8e36e9b9efddc52cbe522cf80a901b793ec45dc0 Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 21:15:40 +0330 Subject: [PATCH 08/18] added comment for tolerance --- examples/impute/plot_iterative_imputer_variants_comparison.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 8865284f055ad..5eeab85fa6e79 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -118,6 +118,8 @@ ] score_iterative_imputer = pd.DataFrame() tolerances = (1e-3, 1e-1, 1e-1, 1e-2) +# iterative imputer is sensible to the tolerance and +# dependent on the estimator used internally. for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( IterativeImputer( From a4855ccfd160901a42516e7450b5105fc9563f6b Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 21:29:32 +0330 Subject: [PATCH 09/18] added bootstrap=True to ET --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 5eeab85fa6e79..b621482dc9fff 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -110,6 +110,7 @@ # the execution time n_estimators=4, max_depth=10, + bootstrap=True, max_samples=0.5, n_jobs=2, random_state=0, From 4509282baf59755eec4ee939183f6660f830a961 Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 21:45:45 +0330 Subject: [PATCH 10/18] changed tolerance comment place --- examples/impute/plot_iterative_imputer_variants_comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index b621482dc9fff..851ead4e30daa 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -118,9 +118,9 @@ KNeighborsRegressor(n_neighbors=15), ] score_iterative_imputer = pd.DataFrame() -tolerances = (1e-3, 1e-1, 1e-1, 1e-2) # iterative imputer is sensible to the tolerance and # dependent on the estimator used internally. +tolerances = (1e-3, 1e-1, 1e-1, 1e-2) for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( IterativeImputer( From b8947b64f273d2083bcb366ab7839a41f3a1a1dc Mon Sep 17 00:00:00 2001 From: siavrez Date: Thu, 25 Nov 2021 22:04:21 +0330 Subject: [PATCH 11/18] change ET with Ny-Ridge pipeline --- ...ot_iterative_imputer_variants_comparison.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 851ead4e30daa..69eed655e79fd 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -49,8 +49,9 @@ from sklearn.datasets import fetch_california_housing from sklearn.impute import SimpleImputer from sklearn.impute import IterativeImputer -from sklearn.linear_model import BayesianRidge -from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor +from sklearn.linear_model import BayesianRidge, Ridge +from sklearn.kernel_approximation import Nystroem +from sklearn.ensemble import RandomForestRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.pipeline import make_pipeline from sklearn.model_selection import cross_val_score @@ -105,22 +106,15 @@ n_jobs=2, random_state=0, ), - ExtraTreesRegressor( - # We tuned the hyperparameters of the ExtraTreesRegressor to minimize - # the execution time - n_estimators=4, - max_depth=10, - bootstrap=True, - max_samples=0.5, - n_jobs=2, - random_state=0, + make_pipeline( + Nystroem(kernel="polynomial", degree=2, random_state=0), Ridge(alpha=1e3) ), KNeighborsRegressor(n_neighbors=15), ] score_iterative_imputer = pd.DataFrame() +tolerances = (1e-3, 1e-1, 1e-1, 1e-2) # iterative imputer is sensible to the tolerance and # dependent on the estimator used internally. -tolerances = (1e-3, 1e-1, 1e-1, 1e-2) for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( IterativeImputer( From 84e77a2ecdff6ef87fbbe4e33ae69bfe245de0d7 Mon Sep 17 00:00:00 2001 From: siavrez Date: Wed, 8 Dec 2021 13:52:59 +0330 Subject: [PATCH 12/18] Changed docstring and added a comment about HistGradientBoosting ability to deal with missing values --- ...t_iterative_imputer_variants_comparison.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 69eed655e79fd..c6de4f7124085 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -13,17 +13,16 @@ imputation with :class:`~impute.IterativeImputer`: * :class:`~linear_model.BayesianRidge`: regularized linear regression -* :class:`~tree.DecisionTreeRegressor`: non-linear regression -* :class:`~ensemble.ExtraTreesRegressor`: similar to missForest in R +* :class:`~tree.RandomForestRegressor`: Forests of randomized trees regression +* :func:`~pipeline.make_pipeline`(:class:`~kernel_approximation.Nystroem`, +:class:`~linear_model.Ridge`): a pipeline with the expansion of a degree 2 +polynomial kernel and regularized linear regression * :class:`~neighbors.KNeighborsRegressor`: comparable to other KNN imputation approaches Of particular interest is the ability of :class:`~impute.IterativeImputer` to mimic the behavior of missForest, a -popular imputation package for R. In this example, we have chosen to use -:class:`~ensemble.ExtraTreesRegressor` instead of -:class:`~ensemble.RandomForestRegressor` (as in missForest) due to its -increased speed. +popular imputation package for R. Note that :class:`~neighbors.KNeighborsRegressor` is different from KNN imputation, which learns from samples with missing values by using a distance @@ -35,8 +34,13 @@ dataset with a single value randomly removed from each row. For this particular pattern of missing values we see that -:class:`~ensemble.ExtraTreesRegressor` and -:class:`~linear_model.BayesianRidge` give the best results. +:class:`~linear_model.BayesianRidge` and +:class:`~ensemble.RandomForestRegressor` give the best results. + +It shoud be noted that some estimators such as +:class:`~ensemble.HistGradientBoostingRegressor` can natively deal with missing +features and are often recommended over building pipelines with complex and +costly missing values imputation strategies. """ @@ -97,7 +101,7 @@ estimators = [ BayesianRidge(), RandomForestRegressor( - # We tuned the hyperparameters of the ExtraTreesRegressor to minimize + # We tuned the hyperparameters of the RandomForestRegressor to minimize # the execution time n_estimators=4, max_depth=10, @@ -115,6 +119,9 @@ tolerances = (1e-3, 1e-1, 1e-1, 1e-2) # iterative imputer is sensible to the tolerance and # dependent on the estimator used internally. +# we tuned the tolerance to keep this example run with limited computational +# resources while not changing the results too much compared to keeping the +# stricter default value for the tolerance parameter. for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( IterativeImputer( From 5bc1ae2ab7efdd7531de598ff12aa2fc43fd0ad4 Mon Sep 17 00:00:00 2001 From: siavrez Date: Fri, 10 Dec 2021 00:07:15 +0330 Subject: [PATCH 13/18] Changed docstring --- .../impute/plot_iterative_imputer_variants_comparison.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index c6de4f7124085..1066d46c94d27 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -38,9 +38,9 @@ :class:`~ensemble.RandomForestRegressor` give the best results. It shoud be noted that some estimators such as -:class:`~ensemble.HistGradientBoostingRegressor` can natively deal with missing -features and are often recommended over building pipelines with complex and -costly missing values imputation strategies. +:class:`~ensemble.HistGradientBoostingRegressor` can natively deal with +missing features and are often recommended over building pipelines with +complex and costly missing values imputation strategies. """ From bbf468c99a821ec9dc917c6f99abb905bf8875e8 Mon Sep 17 00:00:00 2001 From: siavrez Date: Wed, 15 Dec 2021 20:51:35 +0330 Subject: [PATCH 14/18] Update examples/impute/plot_iterative_imputer_variants_comparison.py Co-authored-by: Guillaume Lemaitre --- examples/impute/plot_iterative_imputer_variants_comparison.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 1066d46c94d27..1263a95bf2ad5 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -15,8 +15,8 @@ * :class:`~linear_model.BayesianRidge`: regularized linear regression * :class:`~tree.RandomForestRegressor`: Forests of randomized trees regression * :func:`~pipeline.make_pipeline`(:class:`~kernel_approximation.Nystroem`, -:class:`~linear_model.Ridge`): a pipeline with the expansion of a degree 2 -polynomial kernel and regularized linear regression + :class:`~linear_model.Ridge`): a pipeline with the expansion of a degree 2 + polynomial kernel and regularized linear regression * :class:`~neighbors.KNeighborsRegressor`: comparable to other KNN imputation approaches From e8817d532d93f9f488e64182841c4ac2375b11f3 Mon Sep 17 00:00:00 2001 From: siavrez Date: Wed, 15 Dec 2021 20:51:50 +0330 Subject: [PATCH 15/18] Update examples/impute/plot_iterative_imputer_variants_comparison.py Co-authored-by: Guillaume Lemaitre --- examples/impute/plot_iterative_imputer_variants_comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 1263a95bf2ad5..0bfe615bd36fd 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -116,12 +116,12 @@ KNeighborsRegressor(n_neighbors=15), ] score_iterative_imputer = pd.DataFrame() -tolerances = (1e-3, 1e-1, 1e-1, 1e-2) # iterative imputer is sensible to the tolerance and # dependent on the estimator used internally. # we tuned the tolerance to keep this example run with limited computational # resources while not changing the results too much compared to keeping the # stricter default value for the tolerance parameter. +tolerances = (1e-3, 1e-1, 1e-1, 1e-2) for impute_estimator, tol in zip(estimators, tolerances): estimator = make_pipeline( IterativeImputer( From 0babf502c65bc464efb43a746ed2904f39af14b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 23 Feb 2022 13:46:22 +0100 Subject: [PATCH 16/18] Update examples/impute/plot_iterative_imputer_variants_comparison.py --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 0bfe615bd36fd..27f8ba0137d45 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -70,6 +70,7 @@ X_full = X_full[::10] y_full = y_full[::10] n_samples, n_features = X_full.shape + # Estimate the score on the entire dataset, with no missing values br_estimator = BayesianRidge() score_full_data = pd.DataFrame( From 804b3a0fb993dc2e65b4c465b556c3ae2c54cf14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 23 Feb 2022 13:46:30 +0100 Subject: [PATCH 17/18] Update examples/impute/plot_iterative_imputer_variants_comparison.py --- examples/impute/plot_iterative_imputer_variants_comparison.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 27f8ba0137d45..2527235dfb0cb 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -102,8 +102,8 @@ estimators = [ BayesianRidge(), RandomForestRegressor( - # We tuned the hyperparameters of the RandomForestRegressor to minimize - # the execution time + # We tuned the hyperparameters of the RandomForestRegressor to get a good + # enough predictive performance for a restricted execution time. n_estimators=4, max_depth=10, bootstrap=True, From b30b1281e0022eb3e0371c28671252ea2edf7f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 23 Feb 2022 13:46:40 +0100 Subject: [PATCH 18/18] Update examples/impute/plot_iterative_imputer_variants_comparison.py --- examples/impute/plot_iterative_imputer_variants_comparison.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 2527235dfb0cb..7f719f379d719 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -133,6 +133,7 @@ score_iterative_imputer[impute_estimator.__class__.__name__] = cross_val_score( estimator, X_missing, y_missing, scoring="neg_mean_squared_error", cv=N_SPLITS ) + scores = pd.concat( [score_full_data, score_simple_imputer, score_iterative_imputer], keys=["Original", "SimpleImputer", "IterativeImputer"],