8000 Revert "Deprecate "warn_on_dtype" from check_array (#13382)" · xhluca/scikit-learn@4a110c4 · GitHub < 8000 link rel="manifest" href="/manifest.json" crossOrigin="use-credentials">
[go: up one dir, main page]

Skip to content

Commit 4a110c4

Browse files
author
Xing
authored
Revert "Deprecate "warn_on_dtype" from check_array (scikit-learn#13382)"
This reverts commit ac23178.
1 parent 6174872 commit 4a110c4

File tree

6 files changed

+27
-90
lines changed

6 files changed

+27
-90
lines changed

doc/whats_new/v0.21.rst

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -627,15 +627, 10000 6 @@ Support for Python 3.4 and below has been officially dropped.
627627
affects all ensemble methods using decision trees.
628628
:issue:`12344` by :user:`Adrin Jalali <adrinjalali>`.
629629

630-
:mod:`sklearn.utils`
631-
...................
632-
633-
- |API| Deprecated ``warn_on_dtype`` parameter from :func:`utils.check_array`
634-
and :func:`utils.check_X_y`. Added explicit warning for dtype conversion
635-
in :func:`check_pairwise_arrays` if the ``metric`` being passed is a
636-
pairwise boolean metric.
637-
:issue:`13382` by :user:`Prathmesh Savale <praths007>`.
638-
639630
Multiple modules
640631
................
641632

sklearn/metrics/pairwise.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from ..utils._joblib import effective_n_jobs
3131

3232
from .pairwise_fast import _chi2_kernel_fast, _sparse_manhattan
33-
from ..exceptions import DataConversionWarning
3433

3534

3635
# Utility Functions
@@ -100,18 +99,19 @@ def check_pairwise_arrays(X, Y, precomputed=False, dtype=None):
10099
"""
101100
X, Y, dtype_float = _return_float_dtype(X, Y)
102101

102+
warn_on_dtype = dtype is not None
103103
estimator = 'check_pairwise_arrays'
104104
if dtype is None:
105105
dtype = dtype_float
106106

107107
if Y is X or Y is None:
108108
X = Y = check_array(X, accept_sparse='csr', dtype=dtype,
109-
estimator=estimator)
109+
warn_on_dtype=warn_on_dtype, estimator=estimator)
110110
else:
111111
X = check_array(X, accept_sparse='csr', dtype=dtype,
112-
estimator=estimator)
112+
warn_on_dtype=warn_on_dtype, estimator=estimator)
113113
Y = check_array(Y, accept_sparse='csr', dtype=dtype,
114-
estimator=estimator)
114+
warn_on_dtype=warn_on_dtype, estimator=estimator)
115115

116116
if precomputed:
117117
if X.shape[1] != Y.shape[0]:
@@ -1421,11 +1421,6 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):
14211421
" support sparse matrices.")
14221422

14231423
dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None
1424-
1425-
if dtype == bool and (X.dtype != bool or Y.dtype != bool):
1426-
msg = "Data was converted to boolean for metric %s" % metric
1427-
warnings.warn(msg, DataConversionWarning)
1428-
14291424
X, Y = check_pairwise_arrays(X, Y, dtype=dtype)
14301425

14311426
# precompute data-derived metric params

sklearn/metrics/tests/test_pairwise.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -138,21 +138,6 @@ def test_pairwise_boolean_distance(metric):
138138
res[np.isnan(res)] = 0
139139
assert np.sum(res != 0) == 0
140140

141-
# non-boolean arrays are converted to boolean for boolean
142-
# distance metrics with a data conversion warning
143-
msg = "Data was converted to boolean for metric %s" % metric
144-
with pytest.warns(DataConversionWarning, match=msg):
145-
pairwise_distances(X, metric=metric)
146-
147-
148-
def test_no_data_conversion_warning():
149-
# No warnings issued if metric is not a boolean distance function
150-
rng = np.random.RandomState(0)
151-
X = rng.randn(5, 4)
152-
with pytest.warns(None) as records:
153-
pairwise_distances(X, metric="minkowski")
154-
assert len(records) == 0
155-
156141

157142
@pytest.mark.parametrize('func', [pairwise_distances, pairwise_kernels])
158143
def test_pairwise_precomputed(func):

sklearn/preprocessing/data.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
137137
138138
""" # noqa
139139
X = check_array(X, accept_sparse='csc', copy=copy, ensure_2d=False,
140-
estimator='the scale function', dtype=FLOAT_DTYPES,
141-
force_all_finite='allow-nan')
140+
warn_on_dtype=False, estimator='the scale function',
141+
dtype=FLOAT_DTYPES, force_all_finite='allow-nan')
142142
if sparse.issparse(X):
143143
if with_mean:
144144
raise ValueError(
@@ -348,7 +348,7 @@ def partial_fit(self, X, y=None):
348348
raise TypeError("MinMaxScaler does no support sparse input. "
349349
"You may consider to use MaxAbsScaler instead.")
350350

351-
X = check_array(X, copy=self.copy,
351+
X = check_array(X, copy=self.copy, warn_on_dtype=False,
352352
estimator=self, dtype=FLOAT_DTYPES,
353353
force_all_finite="allow-nan")
354354

@@ -468,7 +468,7 @@ def minmax_scale(X, feature_range=(0, 1), axis=0, copy=True):
468468
""" # noqa
469469
# Unlike the scaler object, this function allows 1d input.
470470
# If copy is required, it will be done inside the scaler object.
471-
X = check_array(X, copy=False, ensure_2d=False,
471+
X = check_array(X, copy=False, ensure_2d=False, warn_on_dtype=False,
472472
dtype=FLOAT_DTYPES, force_all_finite='allow-nan')
473473
original_ndim = X.ndim
474474

@@ -659,8 +659,8 @@ def partial_fit(self, X, y=None):
659659
Ignored
660660
"""
661661
X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
662-
estimator=self, dtype=FLOAT_DTYPES,
663-
force_all_finite='allow-nan')
662+
< 10000 span class="pl-s1 x x-first">warn_on_dtype=False, estimator=self,
663+
dtype=FLOAT_DTYPES, force_all_finite='allow-nan')
664664

665665
# Even in the case of `with_mean=False`, we update the mean anyway
666666
# This is needed for the incremental computation of the var
@@ -753,7 +753,7 @@ def transform(self, X, copy=None):
753753
check_is_fitted(self, 'scale_')
754754

755755
copy = copy if copy is not None else self.copy
756-
X = check_array(X, accept_sparse='csr', copy=copy,
756+
X = check_array(X, accept_sparse='csr', copy=copy, warn_on_dtype=False,
757757
estimator=self, dtype=FLOAT_DTYPES,
758758
force_all_finite='allow-nan')
759759

sklearn/utils/tests/test_validation.py

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -387,15 +387,12 @@ def test_check_array_dtype_warning():
387387
assert_equal(X_checked.dtype, np.float64)
388388

389389
for X in float64_data:
390-
with pytest.warns(None) as record:
391-
warnings.simplefilter("ignore", DeprecationWarning) # 0.23
392-
X_checked = check_array(X, dtype=np.float64,
393-
accept_sparse=True, warn_on_dtype=True)
394-
assert_equal(X_checked.dtype, np.float64)
395-
X_checked = check_array(X, dtype=np.float64,
396-
accept_sparse=True, warn_on_dtype=False)
397-
assert_equal(X_checked.dtype, np.float64)
398-
assert len(record) == 0
390+
X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
391+
accept_sparse=True, warn_on_dtype=True)
392+
assert_equal(X_checked.dtype, np.float64)
393+
X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
394+
accept_sparse=True, warn_on_dtype=False)
395+
assert_equal(X_checked.dtype, np.float64)
399396

400397
for X in float32_data:
401398
X_checked = assert_no_warnings(check_array, X,
@@ -420,17 +417,6 @@ def test_check_array_dtype_warning():
420417
assert_equal(X_checked.format, 'csr')
421418

422419

423-
def test_check_array_warn_on_dtype_deprecation():
424-
X = np.asarray([[0.0], [1.0]])
425-
Y = np.asarray([[2.0], [3.0]])
426-
with pytest.warns(DeprecationWarning,
427-
match="'warn_on_dtype' is deprecated"):
428-
check_array(X, warn_on_dtype=True)
429-
with pytest.warns(DeprecationWarning,
430-
match="'warn_on_dtype' is deprecated"):
431-
check_X_y(X, Y, warn_on_dtype=True)
432-
433-
434420
def test_check_array_accept_sparse_type_exception():
435421
X = [[1, 2], [3, 4]]
436422
X_csr = sp.csr_matrix(X)
@@ -704,7 +690,8 @@ def test_suppress_validation():
704690
def test_check_array_series():
705691
# regression test that check_array works on pandas Series
706692
pd = importorskip("pandas")
707-
res = check_array(pd.Series([1, 2, 3]), ensure_2d=False)
693+
res = check_array(pd.Series([1, 2, 3]), ensure_2d=False,
694+
warn_on_dtype=True)
708695
assert_array_equal(res, np.array([1, 2, 3]))
709696

710697
# with categorical dtype (not a numpy dtype) (GH12699)
@@ -725,10 +712,7 @@ def test_check_dataframe_warns_on_dtype():
725712
check_array, df, dtype=np.float64, warn_on_dtype=True)
726713
assert_warns(DataConversionWarning, check_array, df,
727714
dtype='numeric', warn_on_dtype=True)
728-
with pytest.warns(None) as record:
729-
warnings.simplefilter("ignore", DeprecationWarning) # 0.23
730-
check_array(df, dtype='object', warn_on_dtype=True)
731-
assert len(record) == 0
715+
assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True)
732716

733717
# Also check that it raises a warning for mixed dtypes in a DataFrame.
734718
df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
@@ -744,11 +728,8 @@ def test_check_dataframe_warns_on_dtype():
744728
df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
745729
assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
746730
dtype='numeric', warn_on_dtype=True)
747-
with pytest.warns(None) as record:
748-
warnings.simplefilter("ignore", DeprecationWarning) # 0.23
749-
check_array(df_mixed_numeric.astype(int),
750-
dtype='numeric', warn_on_dtype=True)
751-
assert len(record) == 0
731+
assert_no_warnings(check_array, df_mixed_numeric.astype(int),
732+
dtype='numeric', warn_on_dtype=True)
752733

753734

754735
class DummyMemory:

sklearn/utils/validation.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def _ensure_no_complex_data(array):
332332
def check_array(array, accept_sparse=False, accept_large_sparse=True,
333333
dtype="numeric", order=None, copy=False, force_all_finite=True,
334334
ensure_2d=True, allow_nd=False, ensure_min_samples=1,
335-
ensure_min_features=1, warn_on_dtype=None, estimator=None):
335+
ensure_min_features=1, warn_on_dtype=False, estimator=None):
336336

337337
"""Input validation on an array, list, sparse matrix or similar.
338338
@@ -407,30 +407,19 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
407407
dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
408408
disables this check.
409409
410-
warn_on_dtype : boolean or None, optional (default=None)
410+
warn_on_dtype : boolean (default=False)
411411
Raise DataConversionWarning if the dtype of the input data structure
412412
does not match the requested dtype, causing a memory copy.
413413
414-
.. deprecated:: 0.21
415-
``warn_on_dtype`` is deprecated in version 0.21 and will be
416-
removed in 0.23.
417-
418414
estimator : str or estimator instance (default=None)
419415
If passed, include the name of the estimator in warning messages.
420416
421417
Returns
422418
-------
423419
array_converted : object
424420
The converted and validated array.
425-
"""
426-
# warn_on_dtype deprecation
427-
if warn_on_dtype is not None:
428-
warnings.warn(
429-
"'warn_on_dtype' is deprecated in version 0.21 and will be "
430-
"removed in 0.23. Don't set `warn_on_dtype` to remove this "
431-
"warning.",
432-
DeprecationWarning)
433421
422+
"""
434423
# store reference to original array to check if copy is needed when
435424
# function returns
436425
array_orig = array
@@ -601,7 +590,7 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True,
601590
dtype="numeric", order=None, copy=False, force_all_finite=True,
602591
ensure_2d=True, allow_nd=False, multi_output=False,
603592
ensure_min_samples=1, ensure_min_features=1, y_numeric=False,
604-
warn_on_dtype=None, estimator=None):
593+
warn_on_dtype=False, estimator=None):
605594
"""Input validation for standard estimators.
606595
607596
Checks X and y for consistent length, enforces X to be 2D and y 1D. By
@@ -686,14 +675,10 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True,
686675
it is converted to float64. Should only be used for regression
687676
algorithms.
688677
689-
warn_on_dtype : boolean or None, optional (default=None)
678+
warn_on_dtype : boolean (default=False)
690679
Raise DataConversionWarning if the dtype of the input data structure
691680
does not match the requested dtype, causing a memory copy.
692681
693-
.. deprecated:: 0.21
694-
``warn_on_dtype`` is deprecated in version 0.21 and will be
695-
removed in 0.23.
696-
697682
estimator : str or estimator instance (default=None)
698683
If passed, include the name of the estimator in warning messages.
699684

0 commit comments

Comments
 (0)
0