8000 Merge branch 'master' into pls_float_preserve · scikit-learn/scikit-learn@508d95e · GitHub
[go: up one dir, main page]

Skip to content

Commit 508d95e

Browse files
authored
Merge branch 'master' into pls_float_preserve
2 parents 0ff938f + 7124d87 commit 508d95e

File tree

6 files changed

+35
-12
lines changed

6 files changed

+35
-12
lines changed

doc/whats_new/v0.20.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,12 @@ Preprocessing
9191

9292
- Added :class:`MICEImputer`, which is a strategy for imputing missing
9393
values by modeling each feature with missing values as a function of
94-
other features in a round-robin fashion. :issue:`8478` by
94+
other features in a round-robin fashion. :issue:`8478` by
9595
:user:`Sergey Feldman <sergeyf>`.
9696

97+
- Updated :class:`preprocessing.MinMaxScaler` to pass through NaN values. :issue:`10404`
98+
by :user:`Lucija Gregov <LucijaGregov>`.
99+
97100
Model evaluation
98101

99102
- Added the :func:`metrics.balanced_accuracy_score` metric and a corresponding
@@ -411,6 +414,12 @@ Preprocessing
411414
``inverse_transform`` on unseen labels. :issue:`9816` by :user:`Charlie Newey
412415
<newey01c>`.
413416

417+
Model evaluation and meta-estimators
418+
419+
- Add improved error message in :func:`model_selection.cross_val_score` when
420+
multiple metrics are passed in ``scoring`` keyword.
421+
:issue:`11006` by :user:`Ming Li <minggli>`.
422+
414423
Datasets
415424

416425
- Fixed a bug in :func:`dataset.load_boston` which had a wrong data point.

sklearn/metrics/scorer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# License: Simplified BSD
2020

2121
from abc import ABCMeta, abstractmethod
22+
from collections import Iterable
2223
import warnings
2324

2425
import numpy as np
@@ -300,6 +301,10 @@ def check_scoring(estimator, scoring=None, allow_none=False):
300301
"If no scoring is specified, the estimator passed should "
301302
"have a 'score' method. The estimator %r does not."
302303
% estimator)
304+
elif isinstance(scoring, Iterable):
305+
raise ValueError("For evaluating multiple scores, use "
306+
"sklearn.model_selection.cross_validate instead. "
307+
"{0} was passed.".format(scoring))
303308
else:
304309
raise ValueError("scoring value should either be a callable, string or"
305310
" None. %r was passed" % scoring)

sklearn/model_selection/tests/test_search.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,10 +1217,10 @@ def test_fit_grid_point():
12171217
assert_equal(n_test_samples, test.size)
12181218

12191219
# Should raise an error upon multimetric scorer
1220-
assert_raise_message(ValueError, "scoring value should either be a "
1221-
"callable, string or None.", fit_grid_point, X, y,
1222-
svc, params, train, test, {'score': scorer},
1223-
verbose=True)
1220+
assert_raise_message(ValueError, "For evaluating multiple scores, use "
1221+
"sklearn.model_selection.cross_validate instead.",
1222+
fit_grid_point, X, y, svc, params, train, test,
1223+
{'score': scorer}, verbose=True)
12241224

12251225

12261226
def test_pickle():

sklearn/preprocessing/data.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
276276
277277
Notes
278278
-----
279+
NaNs are treated as missing values: disregarded in fit, and maintained in
280+
transform.
281+
279282
For a comparison of the different scalers, transformers, and normalizers,
280283
see :ref:`examples/preprocessing/plot_all_scaling.py
281284
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
@@ -340,10 +343,11 @@ def partial_fit(self, X, y=None):
340343
"You may consider to use MaxAbsScaler instead.")
341344

342345
X = check_array(X, copy=self.copy, warn_on_dtype=True,
343-
estimator=self, dtype=FLOAT_DTYPES)
346+
estimator=self, dtype=FLOAT_DTYPES,
347+
force_all_finite="allow-nan")
344348

345-
data_min = np.min(X, axis=0)
346-
data_max = np.max(X, axis=0)
349+
data_min = np.nanmin(X, axis=0)
350+
data_max = np.nanmax(X, axis=0)
347351

348352
# First pass
349353
if not hasattr(self, 'n_samples_seen_'):
@@ -373,7 +377,8 @@ def transform(self, X):
373377
"""
374378
check_is_fitted(self, 'scale_')
375379

376-
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
380+
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
381+
force_all_finite="allow-nan")
377382

378383
X *= self.scale_
379384
X += self.min_
@@ -389,7 +394,8 @@ def inverse_transform(self, X):
389394
"""
390395
check_is_fitted(self, 'scale_')
391396

392-
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES)
397+
X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
398+
force_all_finite="allow-nan")
393399

394400
X -= self.min_
395401
X /= self.scale_

sklearn/preprocessing/tests/test_common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sklearn.datasets import load_iris
55
from sklearn.model_selection import train_test_split
66
from sklearn.preprocessing import QuantileTransformer
7+
from sklearn.preprocessing import MinMaxScaler
78
from sklearn.utils.testing import assert_array_equal
89
from sklearn.utils.testing import assert_allclose
910

@@ -12,7 +13,8 @@
1213

1314
@pytest.mark.parametrize(
1415
"est",
15-
[QuantileTransformer(n_quantiles=10, random_state=42)]
16+
[MinMaxScaler(),
17+
QuantileTransformer(n_quantiles=10, random_state=42)]
1618
)
1719
def test_missing_value_handling(est):
1820
# check that the preprocessing method let pass nan

sklearn/utils/estimator_checks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
'RANSACRegressor', 'RadiusNeighborsRegressor',
7474
'RandomForestRegressor', 'Ridge', 'RidgeCV']
7575

76-
ALLOW_NAN = ['QuantileTransformer', 'Imputer', 'SimpleImputer', 'MICEImputer']
76+
ALLOW_NAN = ['Imputer', 'SimpleImputer', 'MICEImputer',
77+
'MinMaxScaler', 'QuantileTransformer']
7778
SKIP_DTYPE_CONVERSION_CHECK = [
7879
'AdditiveChi2Sampler', 'BernoulliRBM', 'Binarizer', 'Birch',
7980
'CCA', 'DictionaryLearning', 'FactorAnalysis', 'FastICA',

0 commit comments

Comments
 (0)
0