10000 Merge branch 'master' into pls_float_preserve · scikit-learn/scikit-learn@4850707 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4850707

Browse files
authored
Merge branch 'master' into pls_float_preserve
2 parents aecae5f + f1aedf6 commit 4850707

File tree

4 files changed

+19
-7
lines changed

4 files changed

+19
-7
lines changed

doc/whats_new/v0.20.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ Preprocessing
9494
other features in a round-robin fashion. :issue:`8478` by
9595
:user:`Sergey Feldman <sergeyf>`.
9696

97+
- Updated :class:`preprocessing.MinMaxScaler` to pass through NaN values. :issue:`10404`
98+
by :user:`Lucija Gregov <LucijaGregov>`.
99+
97100
Model evaluation
98101

99102
- Added the :func:`metrics.balanced_accuracy_score` metric and a corresponding

sklearn/preprocessing/data.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
276276
277277
Notes
278278
-----
279+
NaNs are treated as missing values: disregarded in fit, and maintained in
280+
transform.
281+
279282
For a comparison of the different scalers, transformers, and normalizers,
280283
see :ref:`examples/preprocessing/plot_all_scaling.py
281284
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
@@ -340,10 +343,11 @@ def partial_fit(self, X, y=None):
340343
"You may consider to use MaxAbsScaler instead.")
341344

342345
X = check_array(X, copy=self.copy, warn_on_dtype=True,
343-
estimator=self, dtype=FLOAT_DTYPES)
346+
estimator=self, dtype=FLOAT_DTYPES,
347+
force_all_finite="allow-nan")
344348

345-
data_min = np.min(X, axis=0)
346-
data_max = np.max(X, axis=0)
349+
data_min = np.nanmin(X, axis=0)
350+
data_max = np.nanmax(X, axis=0)
347351

348352
# First pass
349353
if not hasattr(self, 'n_samples_seen_'):
@@ -373,7 +377,8 @@ def transform(self, X):
373377
"""
374378
check_is_fitted(self, 'scale_')
375379

376-
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
380+
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
381+
force_all_finite="allow-nan")
377382

378383
X *= self.scale_
379384
X += self.min_
@@ -389,7 +394,8 @@ def inverse_transform(self, X):
389394
"""
390395
check_is_fitted(self, 'scale_')
391396

392-
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
397+
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
398+
force_all_finite="allow-nan")
393399

394400
X -= self.min_
395401
X /= self.scale_

sklearn/preprocessing/tests/test_common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sklearn.datasets import load_iris
55
from sklearn.model_selection import train_test_split
66
from sklearn.preprocessing import QuantileTransformer
7+
from sklearn.preprocessing import MinMaxScaler
78
from sklearn.utils.testing import assert_array_equal
89
from sklearn.utils.testing import assert_allclose
910

@@ -12,7 +13,8 @@
1213

1314
@pytest.mark.parametrize(
1415
"est",
15-
[QuantileTransformer(n_quantiles=10, random_state=42)]
16+
[MinMaxScaler(),
17+
QuantileTransformer(n_quantiles=10, random_state=42)]
1618
)
1719
def test_missing_value_handling(est):
1820
# check that the preprocessing method let pass nan

sklearn/utils/estimator_checks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
'RANSACRegressor', 'RadiusNeighborsRegressor',
7474
'RandomForestRegressor', 'Ridge', 'RidgeCV']
7575

76-
ALLOW_NAN = ['QuantileTransformer', 'Imputer', 'SimpleImputer', 'MICEImputer']
76+
ALLOW_NAN = ['Imputer', 'SimpleImputer', 'MICEImputer',
77+
'MinMaxScaler', 'QuantileTransformer']
7778
SKIP_DTYPE_CONVERSION_CHECK = [
7879
'AdditiveChi2Sampler', 'BernoulliRBM', 'Binarizer', 'Birch',
7980
'CCA', 'DictionaryLearning', 'FactorAnalysis', 'FastICA',

0 commit comments

Comments
 (0)
0