8000 FIX raise error for float to int casting with NaN, inf in check_array… · scikit-learn/scikit-learn@e6a4dc9 · GitHub
[go: up one dir, main page]

Skip to content

Commit e6a4dc9

Browse files
rthglemaitre
authored andcommitted
FIX raise error for float to int casting with NaN, inf in check_array (#14872)
1 parent 97185ec commit e6a4dc9

File tree

3 files changed

+41
-3
lines changed

3 files changed

+41
-3
lines changed

doc/whats_new/v0.22.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,10 @@ Changelog
554554
and sparse matrix.
555555
:pr:`14538` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
556556

557+
- |Fix| :func:`utils.check_array` is now raising an error instead of casting
558+
NaN to integer.
559+
:pr:`14872` by `Roman Yurchak`_.
560+
557561
:mod:`sklearn.metrics`
558562
..................................
559563

sklearn/utils/tests/test_validation.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,26 @@ def test_check_array_force_all_finite_object():
202202
check_array(X, dtype=None, force_all_finite=True)
203203

204204

205+
@pytest.mark.parametrize(
206+
"X, err_msg",
207+
[(np.array([[1, np.nan]]),
208+
"Input contains NaN, infinity or a value too large for.*int"),
209+
(np.array([[1, np.nan]]),
210+
"Input contains NaN, infinity or a value too large for.*int"),
211+
(np.array([[1, np.inf]]),
212+
"Input contains NaN, infinity or a value too large for.*int"),
213+
(np.array([[1, np.nan]], dtype=np.object),
214+
"cannot convert float NaN to integer")]
215+
)
216+
@pytest.mark.parametrize("force_all_finite", [True, False])
217+
def test_check_array_force_all_finite_object_unsafe_casting(
218+
X, err_msg, force_all_finite):
219+
# casting a float array containing NaN or inf to int dtype should
220+
# raise an error irrespective of the force_all_finite parameter.
221+
with pytest.raises(ValueError, match=err_msg):
222+
check_array(X, dtype=np.int, force_all_finite=force_all_finite)
223+
224+
205225
@ignore_warnings
206226
def test_check_array():
207227
# accept_sparse == False

sklearn/utils/validation.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
warnings.simplefilter('ignore', NonBLASDotWarning)
3333

3434

35-
def _assert_all_finite(X, allow_nan=False):
35+
def _assert_all_finite(X, allow_nan=False, msg_dtype=None):
3636
"""Like assert_all_finite, but only for ndarray."""
3737
# validation is also imported in extmath
3838
from .extmath import _safe_accumulator_op
@@ -52,7 +52,11 @@ def _assert_all_finite(X, allow_nan=False):
5252
if (allow_nan and np.isinf(X).any() or
5353
not allow_nan and not np.isfinite(X).all()):
5454
type_err = 'infinity' if allow_nan else 'NaN, infinity'
55-
raise ValueError(msg_err.format(type_err, X.dtype))
55+
raise ValueError(
56+
msg_err.format
57+
(type_err,
58+
msg_dtype if msg_dtype is not None else X.dtype)
59+
)
5660
# for object dtype data, we only check for NaNs (GH-13254)
5761
elif X.dtype == np.dtype('object') and not allow_nan:
5862
if _object_dtype_isnan(X).any():
@@ -494,7 +498,17 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
494498
with warnings.catch_warnings():
495499
try:
496500
warnings.simplefilter('error', ComplexWarning)
497-
array = np.asarray(array, dtype=dtype, order=order)
501+
if dtype is not None and np.dtype(dtype).kind in 'iu':
502+
# Conversion float -> int should not contain NaN or
503+
# inf (numpy#14412). We cannot use casting='safe' because
504+
# then conversion float -> int would be disallowed.
505+
array = np.asarray(array, order=order)
506+
if array.dtype.kind == 'f':
507+
_assert_all_finite(array, allow_nan=False,
508+
msg_dtype=dtype)
509+
array = array.astype(dtype, casting="unsafe", copy=False)
510+
else:
511+
array = np.asarray(array, order=order, dtype=dtype)
498512
except ComplexWarning:
499513
raise ValueError("Complex data not supported\n"
500514
"{}\n".format(array))

0 commit comments

Comments
 (0)
0