Closed
Description
Hi!
Describe the bug
The RandomOversampler
crashes with an Cannot cast DatetimeArray to dtype timedelta64[ns]
error when a timedelta64[ns]
column with only NaT
s is present in the dataframe.
If only one of the elements is an actual timedelta64, no error occurs.
Steps/Code to Reproduce
from datetime import timedelta
import numpy as np
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
X = pd.DataFrame(
{
'col_str': ["abc", "def", "xyz"],
'col_timedelta': pd.to_timedelta([np.nan, np.nan, np.nan])
}
)
display(X.info())
# Data columns (total 2 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 col_str 3 non-null object
# 1 col_timedelta 1 non-null timedelta64[ns]
# dtypes: object(1), timedelta64[ns](1)
# memory usage: 176.0+ bytes
y = [0, 0, 1]
RandomOverSampler().fit_resample(X, y)
Expected Results
No error should occur and the datatype of the column in the returned dataframe should have the type timedelta64[ns]
.
Actual Results
Here the traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
...
File /code_location/venv/lib/python3.10/site-packages/imblearn/base.py:208, in BaseSampler.fit_resample(self, X, y)
187 """Resample the dataset.
188
189 Parameters
(...)
205 The corresponding label of `X_resampled`.
206 """
207 self._validate_params()
--> 208 return super().fit_resample(X, y)
File /code_location/venv/lib/python3.10/site-packages/imblearn/base.py:118, in SamplerMixin.fit_resample(self, X, y)
112 output = self._fit_resample(X, y)
114 y_ = (
115 label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
116 )
--> 118 X_, y_ = arrays_transformer.transform(output[0], y_)
119 return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
File /code_location/venv/lib/python3.10/site-packages/imblearn/utils/_validation.py:39, in ArraysTransformer.transform(self, X, y)
38 def transform(self, X, y):
---> 39 X = self._transfrom_one(X, self.x_props)
40 y = self._transfrom_one(y, self.y_props)
41 if self.x_props["type"].lower() == "dataframe" and self.y_props[
42 "type"
43 ].lower() in {"series", "dataframe"}:
44 # We lost the y.index during resampling. We can safely use X.index to align
45 # them.
File /code_location/venv/lib/python3.10/site-packages/imblearn/utils/_validation.py:65, in ArraysTransformer._transfrom_one(self, array, props)
62 import pandas as pd
64 ret = pd.DataFrame(array, columns=props["columns"])
---> 65 ret = ret.astype(props["dtypes"])
66 elif type_ == "series":
67 import pandas as pd
File /code_location/venv/lib/python3.10/site-packages/pandas/core/generic.py:6305, in NDFrame.astype(self, dtype, copy, errors)
6303 else:
6304 try:
-> 6305 res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
6306 except ValueError as ex:
6307 ex.args = (
6308 f"{ex}: Error while type casting for column '{col_name}'",
6309 )
File /code_location/venv/lib/python3.10/site-packages/pandas/core/generic.py:6324, in NDFrame.astype(self, dtype, copy, errors)
6317 results = [
6318 self.iloc[:, i].astype(dtype, copy=copy)
6319 for i in range(len(self.columns))
6320 ]
6322 else:
6323 # else, only a single dtype is given
-> 6324 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6325 return self._constructor(new_data).__finalize__(self, method="astype")
6327 # GH 33113: handle empty frame or series
File /code_location/venv/lib/python3.10/site-packages/pandas/core/internals/managers.py:451, in BaseBlockManager.astype(self, dtype, copy, errors)
448 elif using_copy_on_write():
449 copy = False
--> 451 return self.apply(
452 "astype",
453 dtype=dtype,
454 copy=copy,
455 errors=errors,
456 using_cow=using_copy_on_write(),
457 )
File /code_location/venv/lib/python3.10/site-packages/pandas/core/internals/managers.py:352, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
350 applied = b.apply(f, **kwargs)
351 else:
--> 352 applied = getattr(b, f)(**kwargs)
353 result_blocks = extend_blocks(applied, result_blocks)
355 out = type(self).from_blocks(result_blocks, self.axes)
File /code_location/venv/lib/python3.10/site-packages/pandas/core/internals/blocks.py:511, in Block.astype(self, dtype, copy, errors, using_cow)
491 """
492 Coerce to the new dtype.
493
(...)
507 Block
508 """
509 values = self.values
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
513 new_values = maybe_coerce_values(new_values)
515 refs = None
File /code_location/venv/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:242, in astype_array_safe(values, dtype, copy, errors)
239 dtype = dtype.numpy_dtype
241 try:
--> 242 new_values = astype_array(values, dtype, copy=copy)
243 except (ValueError, TypeError):
244 # e.g. _astype_nansafe can fail on object-dtype of strings
245 # trying to convert to float
246 if errors == "ignore":
File /code_location/venv/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:184, in astype_array(values, dtype, copy)
180 return values
182 if not isinstance(values, np.ndarray):
183 # i.e. ExtensionArray
--> 184 values = values.astype(dtype, copy=copy)
186 else:
187 values = _astype_nansafe(values, dtype, copy=copy)
File /code_location/venv/lib/python3.10/site-packages/pandas/core/arrays/datetimes.py:701, in DatetimeArray.astype(self, dtype, copy)
699 elif is_period_dtype(dtype):
700 return self.to_period(freq=dtype.freq)
--> 701 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
File /code_location/venv/lib/python3.10/site-packages/pandas/core/arrays/datetimelike.py:487, in DatetimeLikeArrayMixin.astype(self, dtype, copy)
480 elif (
481 is_datetime_or_timedelta_dtype(dtype)
482 and not is_dtype_equal(self.dtype, dtype)
483 ) or is_float_dtype(dtype):
484 # disallow conversion between datetime/timedelta,
485 # and conversions for any datetimelike to float
486 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
--> 487 raise TypeError(msg)
488 else:
489 return np.asarray(self, dtype=dtype)
TypeError: Cannot cast DatetimeArray to dtype timedelta64[ns]
Thanks for your support!
Metadata
Metadata
Assignees
Labels
No labels