|
43 | 43 | check_scalar,
|
44 | 44 | _deprecate_positional_args,
|
45 | 45 | _check_sample_weight,
|
46 |
| - _allclose_dense_sparse) |
| 46 | + _allclose_dense_sparse, |
| 47 | + FLOAT_DTYPES) |
47 | 48 | import sklearn
|
48 | 49 |
|
49 | 50 | from sklearn.exceptions import NotFittedError
|
@@ -352,6 +353,45 @@ def test_check_array_pandas_dtype_object_conversion():
|
352 | 353 | assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
|
353 | 354 |
|
354 | 355 |
|
| 356 | +def test_check_array_pandas_dtype_casting(): |
| 357 | + # test that data-frames with homogeneous dtype are not upcast |
| 358 | + pd = pytest.importorskip('pandas') |
| 359 | + X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32) |
| 360 | + X_df = pd.DataFrame(X) |
| 361 | + assert check_array(X_df).dtype == np.float32 |
| 362 | + assert check_array(X_df, dtype=FLOAT_DTYPES).dtype == np.float32 |
| 363 | + |
| 364 | + X_df.iloc[:, 0] = X_df.iloc[:, 0].astype(np.float16) |
| 365 | + assert_array_equal(X_df.dtypes, |
| 366 | + (np.float16, np.float32, np.float32)) |
| 367 | + assert check_array(X_df).dtype == np.float32 |
| 368 | + assert check_array(X_df, dtype=FLOAT_DTYPES).dtype == np.float32 |
| 369 | + |
| 370 | + X_df.iloc[:, 1] = X_df.iloc[:, 1].astype(np.int16) |
| 371 | + # float16, int16, float32 casts to float32 |
| 372 | + assert check_array(X_df).dtype == np.float32 |
| 373 | + assert check_array(X_df, dtype=FLOAT_DTYPES).dtype == np.float32 |
| 374 | + |
| 375 | + X_df.iloc[:, 2] = X_df.iloc[:, 2].astype(np.float16) |
| 376 | + # float16, int16, float16 casts to float32 |
| 377 | + assert check_array(X_df).dtype == np.float32 |
| 378 | + assert check_array(X_df, dtype=FLOAT_DTYPES).dtype == np.float32 |
| 379 | + |
| 380 | + X_df = X_df.astype(np.int16) |
| 381 | + assert check_array(X_df).dtype == np.int16 |
| 382 | + # we're not using upcasting rules for determining |
| 383 | + # the target type yet, so we cast to the default of float64 |
| 384 | + assert check_array(X_df, dtype=FLOAT_DTYPES).dtype == np.float64 |
| 385 | + |
| 386 | + # check that we handle pandas dtypes in a semi-reasonable way |
| 387 | + # this is actually tricky because we can't really know that this |
| 388 | + # should be integer ahead of converting it. |
| 389 | + cat_df = pd.DataFrame([pd.Categorical([1, 2, 3])]) |
| 390 | + assert (check_array(cat_df).dtype == np.int64) |
| 391 | + assert (check_array(cat_df, dtype=FLOAT_DTYPES).dtype |
| 392 | + == np.float64) |
| 393 | + |
| 394 | + |
355 | 395 | def test_check_array_on_mock_dataframe():
|
356 | 396 | arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]])
|
357 | 397 | mock_df = MockDataFrame(arr)
|
|
0 commit comments