diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index ea8837332633a..1c028bba18eae 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -2,7 +2,7 @@ ExtensionOpsMixin, ExtensionScalarOpsMixin) from .categorical import Categorical # noqa -from .datetimes import DatetimeArrayMixin # noqa +from .datetimes import DatetimeArray # noqa from .interval import IntervalArray # noqa from .period import PeriodArray, period_array # noqa from .timedeltas import TimedeltaArrayMixin # noqa diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3fa4f503d2dd5..84a9e90068add 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -39,7 +39,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, take from .base import ExtensionOpsMixin from pandas.util._decorators import deprecate_kwarg @@ -127,6 +127,10 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + @property + def nbytes(self): + return self.asi8.nbytes + @property def shape(self): return (len(self),) @@ -192,6 +196,107 @@ def astype(self, dtype, copy=True): return self._box_values(self.asi8) return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + # ------------------------------------------------------------------ + # ExtensionArray Interface + # isna + # __getitem__ + # __len__ + # nbytes + # take + # _concat_same_type + # copy + # _from_factorized + # factorize / _values_for_factorize + # _from_sequence + # unique + # + # dtype + # + # dropna + # + #* _formatting_values + #* fillna + #* argsort / _values_for_argsort + #* _reduce + + def unique(self): + from pandas.core.algorithms import unique1d + result = unique1d(self.asi8) + return self._shallow_copy(result) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) + + def take(self, indices, allow_fill=False, fill_value=None): + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take(self.asi8, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + # TODO: use "infer"? Why does not passing freq cause + # failures in py37 but not py27? + freq = self.freq if is_period_dtype(self) else None + return self._shallow_copy(new_values, freq=freq) + + @classmethod + def _concat_same_type(cls, to_concat): + # for TimedeltaArray and PeriodArray; DatetimeArray overrides + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + values = np.concatenate([x.asi8 for x in to_concat]) + return cls._simple_new(values, freq=freq) + + def copy(self, deep=False): + # TODO: should `deep` determine whether we copy self.asi8? + if is_datetime64tz_dtype(self): + return type(self)(self.asi8.copy(), tz=self.tz, freq=self.freq) + return type(self)(self.asi8.copy(), freq=self.freq) + + # Following how PeriodArray does this + # TODO: ignoring `type`? + def view(self, dtype=None, type=None): + if dtype is None or dtype is __builtins__['type'](self): + return self + return self._ndarray_values.view(dtype=dtype) + + def _values_for_factorize(self): + return self.asi8, iNaT + + @classmethod + def _from_factorized(cls, values, original): + if is_datetime64tz_dtype(original): + return cls(values, tz=original.tz, freq=original.freq) + return cls(values, freq=original.freq) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + arr = np.asarray(scalars, dtype=object) + if copy: + arr = arr.copy() + + # If necessary this will infer tz from dtype + return cls(arr, dtype=dtype) + # ------------------------------------------------------------------ # Null Handling @@ -736,8 +841,8 @@ def __rsub__(self, other): # we need to wrap in DatetimeArray/Index and flip the operation if not isinstance(other, DatetimeLikeArrayMixin): # Avoid down-casting DatetimeIndex - from pandas.core.arrays import DatetimeArrayMixin - other = DatetimeArrayMixin(other) + from pandas.core.arrays import DatetimeArray + other = DatetimeArray(other) return other - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and not is_datetime64_any_dtype(other)): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 405056c628ceb..6b9834ae379e6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ conversion, fields, timezones, resolution as libresolution) -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.errors import PerformanceWarning from pandas import compat @@ -34,6 +34,7 @@ from pandas.tseries.offsets import Tick, generate_range from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.base import ExtensionArray _midnight = time(0, 0) @@ -122,7 +123,7 @@ def wrapper(self, other): except ValueError: other = np.array(other, dtype=np.object_) elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, - DatetimeArrayMixin)): + DatetimeArray)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) @@ -158,7 +159,7 @@ def wrapper(self, other): return compat.set_function_name(wrapper, opname, cls) -class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): +class DatetimeArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): """ Assumes that subclass __new__/__init__ defines: tz @@ -221,7 +222,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if isinstance(values, DatetimeArrayMixin): + if isinstance(values, DatetimeArray): # extract nanosecond unix timestamps values = values.asi8 if values.dtype == 'i8': @@ -295,7 +296,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if tz is not None and index.tz is None: arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), + ensure_int64(index.asi8), tz, ambiguous=ambiguous) index = cls(arr) @@ -318,7 +319,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if not right_closed and len(index) and index[-1] == end: index = index[:-1] - return cls._simple_new(index.values, freq=freq, tz=tz) + return cls._simple_new(index.asi8, freq=freq, tz=tz) # ----------------------------------------------------------------- # Descriptive Properties @@ -411,6 +412,38 @@ def __iter__(self): for v in converted: yield v + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @property + def _ndarray_values(self): + return self._data + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (datetime, np.datetime64)): + self._assert_tzawareness_compat(fill_value) + fill_value = Timestamp(fill_value).value + else: + raise ValueError("'fill_value' should be a Timestamp. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + + @classmethod + def _concat_same_type(cls, to_concat): + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + + tzs = {x.tz for x in to_concat} + assert len(tzs) == 1 + tz = list(tzs)[0] + + values = np.concatenate([x.asi8 for x in to_concat]) + return cls._simple_new(values, freq=freq, tz=tz) + # ----------------------------------------------------------------- # Comparison Methods @@ -1378,8 +1411,8 @@ def to_julian_date(self): ) / 24.0) -DatetimeArrayMixin._add_comparison_ops() -DatetimeArrayMixin._add_datetimelike_methods() +DatetimeArray._add_comparison_ops() +DatetimeArray._add_datetimelike_methods() def _generate_regular_range(cls, start, end, periods, freq): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 482968fdb4766..88a7306743716 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): ordinals = libperiod.extract_ordinals(periods, freq) return cls(ordinals, freq=freq) - def _values_for_factorize(self): - return self.asi8, iNaT - - @classmethod - def _from_factorized(cls, values, original): - # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls(values, freq=original.freq) - @classmethod def _from_datetime64(cls, data, freq, tz=None): """Construct a PeriodArray from a datetime64 array @@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _concat_same_type(cls, to_concat): - freq = {x.freq for x in to_concat} - assert len(freq) == 1 - freq = list(freq)[0] - values = np.concatenate([x._data for x in to_concat]) - return cls(values, freq=freq) - # -------------------------------------------------------------------- # Data / Attributes @property @@ -379,22 +363,24 @@ def __setitem__( raise TypeError(msg) self._data[key] = value + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + if fill_value.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr, + fill_value.freqstr) + raise IncompatibleFrequency(msg) + fill_value = fill_value.ordinal + else: + raise ValueError("'fill_value' should be a Period. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + def take(self, indices, allow_fill=False, fill_value=None): if allow_fill: - if isna(fill_value): - fill_value = iNaT - elif isinstance(fill_value, Period): - if self.freq != fill_value.freq: - msg = DIFFERENT_FREQ_INDEX.format( - self.freq.freqstr, - fill_value.freqstr - ) - raise IncompatibleFrequency(msg) - - fill_value = fill_value.ordinal - else: - msg = "'fill_value' should be a Period. Got '{}'." - raise ValueError(msg.format(fill_value)) + fill_value = self._validate_fill_value(fill_value) new_values = algos.take(self._data, indices, @@ -438,9 +424,6 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def copy(self, deep=False): - return type(self)(self._data.copy(), freq=self.freq) - def value_counts(self, dropna=False): from pandas import Series, PeriodIndex @@ -582,7 +565,7 @@ def to_timestamp(self, freq=None, how='start'): ------- DatetimeArray/Index """ - from pandas.core.arrays import DatetimeArrayMixin + from pandas.core.arrays import DatetimeArray how = libperiod._validate_end_alias(how) @@ -606,7 +589,7 @@ def to_timestamp(self, freq=None, how='start'): new_data = self.asfreq(freq, how=how) new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) - return DatetimeArrayMixin(new_data, freq='infer') + return DatetimeArray(new_data, freq='infer') # ------------------------------------------------------------------ # Formatting diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cf3ba263d1f81..2130b22d6caba 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -4,10 +4,11 @@ import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas.util._decorators import Appender from pandas import compat from pandas.core.dtypes.common import ( @@ -182,6 +183,20 @@ def _generate_range(cls, start, end, periods, freq, closed=None): return cls._simple_new(index, freq=freq) + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)): + fill_value = Timedelta(fill_value).value + else: + raise ValueError("'fill_value' should be a Timedelta. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ---------------------------------------------------------------- # Arithmetic Methods @@ -214,15 +229,15 @@ def _add_datetime_arraylike(self, other): """Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray""" if isinstance(other, np.ndarray): # At this point we have already checked that dtype is datetime64 - from pandas.core.arrays import DatetimeArrayMixin - other = DatetimeArrayMixin(other) + from pandas.core.arrays import DatetimeArray + other = DatetimeArray(other) # defer to implementation in DatetimeArray return other + self def _add_datetimelike_scalar(self, other): # adding a timedeltaindex to a datetimelike - from pandas.core.arrays import DatetimeArrayMixin + from pandas.core.arrays import DatetimeArray assert other is not NaT other = Timestamp(other) @@ -230,13 +245,13 @@ def _add_datetimelike_scalar(self, other): # In this case we specifically interpret NaT as a datetime, not # the timedelta interpretation we would get by returning self + NaT result = self.asi8.view('m8[ms]') + NaT.to_datetime64() - return DatetimeArrayMixin(result) + return DatetimeArray(result) i8 = self.asi8 result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) result = self._maybe_mask_results(result) - return DatetimeArrayMixin(result, tz=other.tz) + return DatetimeArray(result, tz=other.tz) def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 59429488a7c2f..da734d42d90b6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -23,6 +23,7 @@ is_bool_dtype, is_period_dtype, is_categorical_dtype, + is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_float_dtype, is_integer_dtype, @@ -224,6 +225,7 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ + copy = Index.copy # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index @@ -771,7 +773,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): try: return np.array(other, copy=False).view('i8') except TypeError: - # period array cannot be coerces to int + # period array cannot be coerced to int other = Index(other) return other.asi8 @@ -806,7 +808,10 @@ def wrap_array_method(method, pin_name=False): method """ def index_method(self, *args, **kwargs): - result = method(self, *args, **kwargs) + if is_timedelta64_dtype(self): + result = method(self, *args, **kwargs) + else: + result = method(self._data, *args, **kwargs) # Index.__new__ will choose the appropriate subclass to return result = Index(result) @@ -835,7 +840,11 @@ def wrap_field_accessor(prop): fget = prop.fget def f(self): - result = fget(self) + if is_timedelta64_dtype(self): + result = fget(self) + else: + result = fget(self._data) + if is_bool_dtype(result): # return numpy array b/c there is no BoolIndex return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8da0672559006..bcab13af5c323 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -29,7 +29,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat -from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 +from pandas.core.arrays.datetimes import DatetimeArray, _to_m8 from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index, _index_shared_docs @@ -68,7 +68,7 @@ def _new_DatetimeIndex(cls, d): return result -class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, +class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray of datetime64 data, represented internally as int64, and @@ -182,7 +182,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, pandas.to_datetime : Convert argument to datetime """ - _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) + _resolution = cache_readonly(DatetimeArray._resolution.fget) _typ = 'datetimeindex' _join_precedence = 10 @@ -227,8 +227,8 @@ def _join_i8_wrapper(joinf, **kwargs): _is_numeric_dtype = False _infer_as_myclass = True - _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) - is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) + _timezone = cache_readonly(DatetimeArray._timezone.fget) + is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) # -------------------------------------------------------------------- # Constructors @@ -262,7 +262,7 @@ def __new__(cls, data=None, return result if not isinstance(data, (np.ndarray, Index, ABCSeries, - DatetimeArrayMixin)): + DatetimeArray)): if is_scalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' @@ -280,7 +280,7 @@ def __new__(cls, data=None, data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeArrayMixin): + if isinstance(data, (DatetimeArray, DatetimeIndex)): if tz is None: tz = data.tz elif data.tz is None: @@ -344,22 +344,30 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes assert isinstance(values, np.ndarray), type(values) - result = super(DatetimeIndex, cls)._simple_new(values, freq, tz, - **kwargs) + arr = DatetimeArray(values, freq=freq, tz=tz) + result = object.__new__(cls) + result._data = arr result.name = name result._reset_identity() return result # -------------------------------------------------------------------- + @property + def _tz(self): + return self._data._tz + + @property + def _freq(self): + return self._data._freq + @property def _values(self): - # tz-naive -> ndarray - # tz-aware -> DatetimeIndex - if self.tz is not None: - return self - else: - return self.values + return self._data + + @property + def _ndarray_values(self): + return self._data._data # M8[ns] @property def tz(self): @@ -372,26 +380,36 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + @property + def asi8(self): + return self._data.asi8 + + def __getitem__(self, key): + result = self._data[key] + if is_scalar(result): + return result + return type(self)(result) + @property def size(self): # TODO: Remove this when we have a DatetimeTZArray # Necessary to avoid recursion error since DTI._values is a DTI # for TZ-aware - return self._ndarray_values.size + return self._data.size @property def shape(self): # TODO: Remove this when we have a DatetimeTZArray # Necessary to avoid recursion error since DTI._values is a DTI # for TZ-aware - return self._ndarray_values.shape + return self._data.shape @property def nbytes(self): # TODO: Remove this when we have a DatetimeTZArray # Necessary to avoid recursion error since DTI._values is a DTI # for TZ-aware - return self._ndarray_values.nbytes + return self._data.nbytes def _mpl_repr(self): # how to represent ourselves to matplotlib @@ -567,16 +585,11 @@ def snap(self, freq='S'): # TODO: what about self.name? if so, use shallow_copy? def unique(self, level=None): - # Override here since IndexOpsMixin.unique uses self._values.unique - # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error - # So we extract the tz-naive DatetimeIndex, unique that, and wrap the - # result with out TZ. - if self.tz is not None: - naive = type(self)(self._ndarray_values, copy=False) - else: - naive = self - result = super(DatetimeIndex, naive).unique(level=level) - return self._shallow_copy(result.values) + if level is not None: + self._validate_index_level(level) + + result = self._data.unique() + return self._shallow_copy(result) def union(self, other): """ @@ -1124,43 +1137,44 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray - year = wrap_field_accessor(DatetimeArrayMixin.year) - month = wrap_field_accessor(DatetimeArrayMixin.month) - day = wrap_field_accessor(DatetimeArrayMixin.day) - hour = wrap_field_accessor(DatetimeArrayMixin.hour) - minute = wrap_field_accessor(DatetimeArrayMixin.minute) - second = wrap_field_accessor(DatetimeArrayMixin.second) - microsecond = wrap_field_accessor(DatetimeArrayMixin.microsecond) - nanosecond = wrap_field_accessor(DatetimeArrayMixin.nanosecond) - weekofyear = wrap_field_accessor(DatetimeArrayMixin.weekofyear) + year = wrap_field_accessor(DatetimeArray.year) + month = wrap_field_accessor(DatetimeArray.month) + day = wrap_field_accessor(DatetimeArray.day) + hour = wrap_field_accessor(DatetimeArray.hour) + minute = wrap_field_accessor(DatetimeArray.minute) + second = wrap_field_accessor(DatetimeArray.second) + microsecond = wrap_field_accessor(DatetimeArray.microsecond) + nanosecond = wrap_field_accessor(DatetimeArray.nanosecond) + weekofyear = wrap_field_accessor(DatetimeArray.weekofyear) + week = weekofyear - dayofweek = wrap_field_accessor(DatetimeArrayMixin.dayofweek) + dayofweek = wrap_field_accessor(DatetimeArray.dayofweek) weekday = dayofweek - weekday_name = wrap_field_accessor(DatetimeArrayMixin.weekday_name) + weekday_name = wrap_field_accessor(DatetimeArray.weekday_name) - dayofyear = wrap_field_accessor(DatetimeArrayMixin.dayofyear) - quarter = wrap_field_accessor(DatetimeArrayMixin.quarter) - days_in_month = wrap_field_accessor(DatetimeArrayMixin.days_in_month) + dayofyear = wrap_field_accessor(DatetimeArray.dayofyear) + quarter = wrap_field_accessor(DatetimeArray.quarter) + days_in_month = wrap_field_accessor(DatetimeArray.days_in_month) daysinmonth = days_in_month - is_month_start = wrap_field_accessor(DatetimeArrayMixin.is_month_start) - is_month_end = wrap_field_accessor(DatetimeArrayMixin.is_month_end) - is_quarter_start = wrap_field_accessor(DatetimeArrayMixin.is_quarter_start) - is_quarter_end = wrap_field_accessor(DatetimeArrayMixin.is_quarter_end) - is_year_start = wrap_field_accessor(DatetimeArrayMixin.is_year_start) - is_year_end = wrap_field_accessor(DatetimeArrayMixin.is_year_end) - is_leap_year = wrap_field_accessor(DatetimeArrayMixin.is_leap_year) - - tz_localize = wrap_array_method(DatetimeArrayMixin.tz_localize, True) - tz_convert = wrap_array_method(DatetimeArrayMixin.tz_convert, True) - to_perioddelta = wrap_array_method(DatetimeArrayMixin.to_perioddelta, + is_month_start = wrap_field_accessor(DatetimeArray.is_month_start) + is_month_end = wrap_field_accessor(DatetimeArray.is_month_end) + is_quarter_start = wrap_field_accessor(DatetimeArray.is_quarter_start) + is_quarter_end = wrap_field_accessor(DatetimeArray.is_quarter_end) + is_year_start = wrap_field_accessor(DatetimeArray.is_year_start) + is_year_end = wrap_field_accessor(DatetimeArray.is_year_end) + is_leap_year = wrap_field_accessor(DatetimeArray.is_leap_year) + + tz_localize = wrap_array_method(DatetimeArray.tz_localize, True) + tz_convert = wrap_array_method(DatetimeArray.tz_convert, True) + to_perioddelta = wrap_array_method(DatetimeArray.to_perioddelta, False) - to_period = wrap_array_method(DatetimeArrayMixin.to_period, True) - normalize = wrap_array_method(DatetimeArrayMixin.normalize, True) - to_julian_date = wrap_array_method(DatetimeArrayMixin.to_julian_date, + to_period = wrap_array_method(DatetimeArray.to_period, True) + normalize = wrap_array_method(DatetimeArray.normalize, True) + to_julian_date = wrap_array_method(DatetimeArray.to_julian_date, False) - month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) - day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) + month_name = wrap_array_method(DatetimeArray.month_name, True) + day_name = wrap_array_method(DatetimeArray.day_name, True) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 128068959ebd3..3907573224ef6 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -902,7 +902,7 @@ def _add_datetimelike_methods(cls): add in the datetimelike methods (as we may have to override the superclass) """ - # TODO(DatetimeArray): move this up to DatetimeArrayMixin + # TODO(DatetimeArray): move this up to DatetimeArray def __add__(self, other): # dispatch to ExtensionArray implementation diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b077a6984114..e8217b43af4cd 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -647,6 +647,12 @@ def delete(self, loc): return TimedeltaIndex(new_tds, name=self.name, freq=freq) + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + result = TimedeltaArrayMixin.unique(self) + return self._shallow_copy(result.values) + TimedeltaIndex._add_comparison_ops() TimedeltaIndex._add_numeric_methods() diff --git a/pandas/core/series.py b/pandas/core/series.py index 20e4720a3bde7..49906d63ae50c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4242,6 +4242,9 @@ def _try_cast(arr, take_fast_path): not (is_iterator(subarr) or isinstance(subarr, np.ndarray))): subarr = construct_1d_object_array_from_listlike(subarr) + elif type(subarr).__name__ == "DatetimeArray": + # kludge + pass elif not is_extension_type(subarr): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index cf1abc6f79101..a570d91389a55 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -5,7 +5,7 @@ import pandas as pd from pandas.compat import long -from pandas.core.arrays import PeriodArray, DatetimeArrayMixin as DatetimeArray +from pandas.core.arrays import PeriodArray, DatetimeArray @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3fd03a351de7c..e816f2bd1fc56 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -4,7 +4,7 @@ import pandas as pd from pandas.core.arrays import ( - DatetimeArrayMixin, PeriodArray, TimedeltaArrayMixin) + DatetimeArray, PeriodArray, TimedeltaArrayMixin) import pandas.util.testing as tm @@ -55,12 +55,87 @@ def timedelta_index(request): return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) +def index_to_array(index): + """ + Helper function to construct a Datetime/Timedelta/Period Array from an + instance of the corresponding Index subclass. + """ + if isinstance(index, pd.DatetimeIndex): + return DatetimeArray(index) + elif isinstance(index, pd.TimedeltaIndex): + return TimedeltaArrayMixin(index) + elif isinstance(index, pd.PeriodIndex): + return PeriodArray(index) + else: + raise TypeError(type(index)) + + +class SharedTests(object): + index_cls = None + + def test_take(self): + data = np.arange(100, dtype='i8') + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq='D') + arr = index_to_array(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D') + arr = index_to_array(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) + arr = index_to_array(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + class TestDatetimeArray(object): + index_cls = pd.DatetimeIndex def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert list(dti) == list(arr) # Check that Index.__new__ knows what to do with DatetimeArray @@ -71,7 +146,7 @@ def test_from_dti(self, tz_naive_fixture): def test_astype_object(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -81,7 +156,7 @@ def test_astype_object(self, tz_naive_fixture): def test_to_perioddelta(self, datetime_index, freqstr): # GH#23113 dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_perioddelta(freq=freqstr) result = arr.to_perioddelta(freq=freqstr) @@ -94,7 +169,7 @@ def test_to_perioddelta(self, datetime_index, freqstr): @pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y']) def test_to_period(self, datetime_index, freqstr): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) expected = dti.to_period(freq=freqstr) result = arr.to_period(freq=freqstr) @@ -108,7 +183,7 @@ def test_to_period(self, datetime_index, freqstr): def test_bool_properties(self, datetime_index, propname): # in this case _bool_ops is just `is_leap_year` dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) assert dti.freq == arr.freq result = getattr(arr, propname) @@ -119,15 +194,52 @@ def test_bool_properties(self, datetime_index, propname): @pytest.mark.parametrize('propname', pd.DatetimeIndex._field_ops) def test_int_properties(self, datetime_index, propname): dti = datetime_index - arr = DatetimeArrayMixin(dti) + arr = DatetimeArray(dti) result = getattr(arr, propname) expected = np.array(getattr(dti, propname), dtype=result.dtype) tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = index_to_array(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1')) + + tz = None if dti.tz is not None else 'US/Eastern' + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArray(dti) + + if arr.tz is None: + other = arr.tz_localize('UTC') + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + class TestTimedeltaArray(object): + index_cls = pd.TimedeltaIndex + def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArrayMixin(tdi) @@ -174,8 +286,38 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = index_to_array(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) + + def test_concat_same_type_invalid(self, timedelta_index): + # different freqs + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + other = pd.timedelta_range('1D', periods=5, freq='2D') + # FIXME: TimedeltaArray should inherit freq='2D' without specifying it + other = TimedeltaArrayMixin(other, freq='2D') + assert other.freq != arr.freq + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + class TestPeriodArray(object): + index_cls = pd.PeriodIndex def test_from_pi(self, period_index): pi = period_index @@ -200,9 +342,9 @@ def test_to_timestamp(self, how, period_index): pi = period_index arr = PeriodArray(pi) - expected = DatetimeArrayMixin(pi.to_timestamp(how=how)) + expected = DatetimeArray(pi.to_timestamp(how=how)) result = arr.to_timestamp(how=how) - assert isinstance(result, DatetimeArrayMixin) + assert isinstance(result, DatetimeArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index a15295cfbd81a..bcb8ea915160a 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray +from pandas.core.arrays import DatetimeArray import pandas.util.testing as tm diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 53fa482bdeaef..7994d738c074f 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -19,8 +19,8 @@ class TestABCClasses(object): sparse_series = pd.Series([1, 2, 3]).to_sparse() sparse_array = pd.SparseArray(np.random.randn(10)) sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]}) - datetime_array = pd.core.arrays.DatetimeArrayMixin(datetime_index) - timedelta_array = pd.core.arrays.TimedeltaArrayMixin(timedelta_index) + datetime_array = pd.core.arrays.DatetimeArray(datetime_index) + timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index) def test_abc_types(self): assert isinstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index fd7012c87040f..f51b0cac60779 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -36,7 +36,7 @@ TimedeltaIndex, bdate_range) from pandas.core.algorithms import take_1d from pandas.core.arrays import ( - DatetimeArrayMixin as DatetimeArray, ExtensionArray, IntervalArray, + DatetimeArray, ExtensionArray, IntervalArray, PeriodArray, period_array) import pandas.core.common as com