From 2a6b40915542d8234785500a85c47786e441d055 Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel Date: Tue, 21 Jun 2022 16:29:01 +0200 Subject: [PATCH 1/6] Update see also part --- sklearn/impute/_base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index bb4bfed8098bf..c9acb8f143047 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -218,7 +218,10 @@ class SimpleImputer(_BaseImputer): See Also -------- - IterativeImputer : Multivariate imputation of missing values. + IterativeImputer : Multivariate imputer that estimates each feature + from all the others. + KNNImputer : Multivariate imputer that estimates missing features using + nearest samples. Notes ----- From aa4b433d342097284bad2bf0b00b0a330b4f30cb Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel Date: Tue, 21 Jun 2022 16:29:18 +0200 Subject: [PATCH 2/6] Add quick description of SimpleImputer --- sklearn/impute/_base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index c9acb8f143047..867f64ca82229 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -130,7 +130,10 @@ def _more_tags(self): class SimpleImputer(_BaseImputer): - """Imputation transformer for completing missing values. + """Univariate imputer for completing missing values with simple strategies. + + Replace missing values using the mean, median or most frequent value along + each column, or using a constant. Read more in the :ref:`User Guide `. From 2c2df18428ff09b5367ac571b4e35d8e4bbc1138 Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel Date: Tue, 21 Jun 2022 16:29:59 +0200 Subject: [PATCH 3/6] Add recommendation for using SimpleImputer in notes --- sklearn/impute/_base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 867f64ca82229..30b712a8534ba 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -231,6 +231,11 @@ class SimpleImputer(_BaseImputer): Columns which only contained missing values at :meth:`fit` are discarded upon :meth:`transform` if strategy is not `"constant"`. + In a prediction context, simple imputation usually performs poorly when + associated with a weak learner. However, with a powerful learner, it can + lead to as good or better performance than complex imputation such as + :obj:`IterativeImputer` or :obj:`KNNImputer`. + Examples -------- >>> import numpy as np From b244fca53babb1c5c1c2a07c5ae24d95dc35ddad Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel <33580936+aperezlebel@users.noreply.github.com> Date: Thu, 23 Jun 2022 18:13:37 +0200 Subject: [PATCH 4/6] Update sklearn/impute/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/impute/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 30b712a8534ba..ef772e8950585 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -234,7 +234,7 @@ class SimpleImputer(_BaseImputer): In a prediction context, simple imputation usually performs poorly when associated with a weak learner. However, with a powerful learner, it can lead to as good or better performance than complex imputation such as - :obj:`IterativeImputer` or :obj:`KNNImputer`. + :class:`~sklearn.impute.IterativeImputer` or :class:`~sklearn.impute.KNNImputer`. Examples -------- From a841045646c13b2d9b9ede297f96cbe89dc774cc Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel <33580936+aperezlebel@users.noreply.github.com> Date: Thu, 23 Jun 2022 18:14:07 +0200 Subject: [PATCH 5/6] Update sklearn/impute/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/impute/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index ef772e8950585..46f971eee126a 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -132,8 +132,8 @@ def _more_tags(self): class SimpleImputer(_BaseImputer): """Univariate imputer for completing missing values with simple strategies. - Replace missing values using the mean, median or most frequent value along - each column, or using a constant. + Replace missing values using a descriptive statistic (e.g. mean, median, or + most frequent) along each column, or using a constant value. Read more in the :ref:`User Guide `. From c43746470f9029a959789c49d14a3821ba5ae8b4 Mon Sep 17 00:00:00 2001 From: Alexandre Perez-Lebel <33580936+aperezlebel@users.noreply.github.com> Date: Thu, 23 Jun 2022 18:22:12 +0200 Subject: [PATCH 6/6] Update sklearn/impute/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/impute/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 46f971eee126a..4a000d0f11573 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -221,8 +221,8 @@ class SimpleImputer(_BaseImputer): See Also -------- - IterativeImputer : Multivariate imputer that estimates each feature - from all the others. + IterativeImputer : Multivariate imputer that estimates values to impute for + each feature with missing values from all the others. KNNImputer : Multivariate imputer that estimates missing features using nearest samples.