From c972dbcffeb5356a7f7a51b857efa4b79ae3acf9 Mon Sep 17 00:00:00 2001 From: "HE, Tao" Date: Thu, 7 Mar 2019 15:51:36 +0800 Subject: [PATCH 1/7] Add IntegerArray.size, fix #25580. Signed-off-by: HE, Tao --- pandas/core/arrays/integer.py | 4 ++++ pandas/tests/resample/test_datetime_index.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index fd90aec3b5e8c..a38aeffc36f2b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -400,6 +400,10 @@ def __setitem__(self, key, value): def __len__(self): return len(self._data) + @property + def size(self): + return self._data.size + @property def nbytes(self): return self._data.nbytes + self._mask.nbytes diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ce675893d9907..d7b42047dd7c4 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -101,6 +101,18 @@ def test_resample_basic(series, closed, expected): assert_series_equal(result, expected) +def test_resample_integerarray(): + # resample on IntegerArray + ts = pd.Series(range(9), + index=pd.date_range('1/1/2000', periods=9, freq='T'), + dtype='Int64') + result = ts.resample('3T').sum() + expected = Series([3, 12, 21], + index=pd.date_range('1/1/2000', periods=3, freq='3T'), + dtype="Int64") + assert_series_equal(result, expected) + + def test_resample_basic_grouper(series): s = series result = s.resample('5Min').last() From 3f28f1e47e36b721b420f89216d7d90ce2795ed5 Mon Sep 17 00:00:00 2001 From: "HE, Tao" Date: Thu, 7 Mar 2019 16:04:17 +0800 Subject: [PATCH 2/7] Add what's new. Signed-off-by: HE, Tao --- doc/source/whatsnew/v0.24.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 4ca9d57f3a2e5..0311fec0c11b0 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,6 +31,7 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :meth:`Series.resample` when underlying array is :class:`IntegerArray`. (:issue:`25580`) .. _whatsnew_0242.enhancements: From 19013fb5f31219ec22c50da08bb66d299cff7b39 Mon Sep 17 00:00:00 2001 From: "HE, Tao" Date: Thu, 7 Mar 2019 22:19:15 +0800 Subject: [PATCH 3/7] Move the method `size` to `ExtensionArray`. Signed-off-by: HE, Tao --- doc/source/whatsnew/v0.24.2.rst | 4 +--- pandas/core/arrays/base.py | 13 +++++++++++++ pandas/core/arrays/integer.py | 4 ---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 0311fec0c11b0..2e15a6743b9b5 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,15 +31,13 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) -- Fixed regression in :meth:`Series.resample` when underlying array is :class:`IntegerArray`. (:issue:`25580`) .. _whatsnew_0242.enhancements: Enhancements ^^^^^^^^^^^^ -- -- +- Add :meth:`ExtensionArray.size` to return the number of elements in :class:`ExtensionArray` (:issue:`25580`) .. _whatsnew_0242.bug_fixes: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e770281596134..fcdbdfddbf2c0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -46,6 +46,7 @@ class ExtensionArray(object): * __len__ * dtype * nbytes + * size * isna * take * copy @@ -318,6 +319,18 @@ def nbytes(self): # on the number of bytes needed. raise AbstractMethodError(self) + @property + def size(self): + # type: () -> int + """ + The number of elements in this array. + + Returns + ------- + size : int + """ + return len(self) + # ------------------------------------------------------------------------ # Additional Methods # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index a38aeffc36f2b..fd90aec3b5e8c 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -400,10 +400,6 @@ def __setitem__(self, key, value): def __len__(self): return len(self._data) - @property - def size(self): - return self._data.size - @property def nbytes(self): return self._data.nbytes + self._mask.nbytes From b5358573f457b5492aa32d9a61eb991348b78aa6 Mon Sep 17 00:00:00 2001 From: "HE, Tao" Date: Sat, 9 Mar 2019 00:00:32 +0800 Subject: [PATCH 4/7] Redefine `IndexOpsMixin.size` and revise whatsnew. Signed-off-by: HE, Tao --- doc/source/whatsnew/v0.24.2.rst | 4 +++- pandas/core/arrays/base.py | 13 ------------- pandas/core/base.py | 2 +- pandas/tests/resample/test_datetime_index.py | 2 +- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 2e15a6743b9b5..e9c67becaaefe 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,13 +31,15 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed bug where :meth:`core.base.IndexOpsMixin.size` could not return correct result for :class:`api.extensions.ExtensionArray` (:issue:`25580`) .. _whatsnew_0242.enhancements: Enhancements ^^^^^^^^^^^^ -- Add :meth:`ExtensionArray.size` to return the number of elements in :class:`ExtensionArray` (:issue:`25580`) +- +- .. _whatsnew_0242.bug_fixes: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fcdbdfddbf2c0..e770281596134 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -46,7 +46,6 @@ class ExtensionArray(object): * __len__ * dtype * nbytes - * size * isna * take * copy @@ -319,18 +318,6 @@ def nbytes(self): # on the number of bytes needed. raise AbstractMethodError(self) - @property - def size(self): - # type: () -> int - """ - The number of elements in this array. - - Returns - ------- - size : int - """ - return len(self) - # ------------------------------------------------------------------------ # Additional Methods # ------------------------------------------------------------------------ diff --git a/pandas/core/base.py b/pandas/core/base.py index f896596dd5216..92bccdb525a74 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -745,7 +745,7 @@ def nbytes(self): """ Return the number of bytes in the underlying data. """ - return self._values.nbytes + return len(self._values) @property def strides(self): diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index d7b42047dd7c4..ec05595536de4 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -102,7 +102,7 @@ def test_resample_basic(series, closed, expected): def test_resample_integerarray(): - # resample on IntegerArray + # GH 25580, resample on IntegerArray ts = pd.Series(range(9), index=pd.date_range('1/1/2000', periods=9, freq='T'), dtype='Int64') From 92cb9eb83905e59124f0129ad17f794add80fdf8 Mon Sep 17 00:00:00 2001 From: "HE, Tao" Date: Sat, 9 Mar 2019 09:33:07 +0800 Subject: [PATCH 5/7] Fix a mistake, add a test case for `size` and revise whatsnew. Signed-off-by: HE, Tao --- doc/source/whatsnew/v0.24.2.rst | 2 +- pandas/core/base.py | 4 ++-- pandas/tests/series/test_api.py | 7 +++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index e9c67becaaefe..74026aabbf751 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,7 +31,7 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) -- Fixed bug where :meth:`core.base.IndexOpsMixin.size` could not return correct result for :class:`api.extensions.ExtensionArray` (:issue:`25580`) +- Fixed bug where :meth:`Index.size` could not return correct result for :class:`api.extensions.ExtensionArray` (:issue:`25580`) .. _whatsnew_0242.enhancements: diff --git a/pandas/core/base.py b/pandas/core/base.py index 92bccdb525a74..8a57904aecce4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -745,7 +745,7 @@ def nbytes(self): """ Return the number of bytes in the underlying data. """ - return len(self._values) + return self._values.nbytes @property def strides(self): @@ -762,7 +762,7 @@ def size(self): """ Return the number of elements in the underlying data. """ - return self._values.size + return len(self._values) @property def flags(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1f2e2b179c687..3ad9d54175f31 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -493,6 +493,13 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('s.', 1)) + def test_integer_series_size(self): + # GH 25580 + s = Series(range(9)) + assert s.size == 9 + s = Series(range(9), dtype="Int64") + assert s.size == 9 + class TestCategoricalSeries(object): From e210931d35d0166fcef6c289a4eacf2dc1f64201 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 11 Mar 2019 06:09:08 -0500 Subject: [PATCH 6/7] Fixup whatsnew --- doc/source/whatsnew/v0.24.2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 74026aabbf751..e7266813e8446 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,7 +31,6 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) -- Fixed bug where :meth:`Index.size` could not return correct result for :class:`api.extensions.ExtensionArray` (:issue:`25580`) .. _whatsnew_0242.enhancements: @@ -101,7 +100,8 @@ Bug Fixes - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) - Bug in ``IntervalTree`` where a ``RecursionError`` occurs upon construction due to an overflow when adding endpoints, which also causes :class:`IntervalIndex` to crash during indexing operations (:issue:`25485`) -- +- Bug in :meth:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) +- Bug in resampling raising for nullable integer-dtype columns (:issue:`25580`) .. _whatsnew_0242.contributors: From 8b6a6f4d8d6b60e27034d212b106dc4900e1af19 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 11 Mar 2019 06:09:35 -0500 Subject: [PATCH 7/7] Fixup --- doc/source/whatsnew/v0.24.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index e7266813e8446..7a983db754fa9 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -100,7 +100,7 @@ Bug Fixes - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) - Bug in ``IntervalTree`` where a ``RecursionError`` occurs upon construction due to an overflow when adding endpoints, which also causes :class:`IntervalIndex` to crash during indexing operations (:issue:`25485`) -- Bug in :meth:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) +- Bug in :attr:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) - Bug in resampling raising for nullable integer-dtype columns (:issue:`25580`) .. _whatsnew_0242.contributors: