8000 DEPR: numeric_only default in resampler ops (#47177) · pandas-dev/pandas@62b6d25 · GitHub
[go: up one dir, main page]

Skip to content

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 62b6d25

Browse files
authored
DEPR: numeric_only default in resampler ops (#47177)
1 parent 21e6da3 commit 62b6d25

File tree

3 files changed

+132
-51
lines changed

3 files changed

+132
-51
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:
605605
``numeric_only`` default value
606606
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
607607

608-
Across the DataFrame and DataFrameGroupBy operations such as
608+
Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as
609609
``min``, ``sum``, and ``idxmax``, the default
610610
value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
611611
Furthermore, operations with the default value ``None`` can lead to surprising
@@ -644,6 +644,11 @@ gained the ``numeric_only`` argument.
644644
- :meth:`.GroupBy.std`
645645
- :meth:`.GroupBy.sem`
646646
- :meth:`.DataFrameGroupBy.quantile`
647+
- :meth:`.Resampler.mean`
648+
- :meth:`.Resampler.median`
649+
- :meth:`.Resampler.sem`
650+
- :meth:`.Resampler.std`
651+
- :meth:`.Resampler.var`
647652

648653
.. _whatsnew_150.deprecations.other:
649654

pandas/core/resample.py

Lines changed: 90 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def transform(self, arg, *args, **kwargs):
393393
"""
394394
return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs)
395395

396-
def _downsample(self, f):
396+
def _downsample(self, f, **kwargs):
397397
raise AbstractMethodError(self)
398398

399399
def _upsample(self, f, limit=None, fill_value=None):
@@ -937,25 +937,28 @@ def asfreq(self, fill_value=None):
937937
"""
938938
return self._upsample("asfreq", fill_value=fill_value)
939939

940-
def std(self, ddof=1, *args, **kwargs):
940+
def std(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
941941
"""
942942
Compute standard deviation of groups, excluding missing values.
943943
944944
Parameters
945945
----------
946946
ddof : int, default 1
947947
Degrees of freedom.
948+
numeric_only : bool, default False
949+
Include only `float`, `int` or `boolean` data.
950+
951+
.. versionadded:: 1.5.0
948952
949953
Returns
950954
-------
951955
DataFrame or Series
952956
Standard deviation of values within each group.
953957
"""
954958
nv.validate_resampler_func("std", args, kwargs)
955-
# error: Unexpected keyword argument "ddof" for "_downsample"
956-
return self._downsample("std", ddof=ddof) # type: ignore[call-arg]
959+
return self._downsample("std", ddof=ddof, numeric_only=numeric_only)
957960

958-
def var(self, ddof=1, *args, **kwargs):
961+
def var(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
959962
"""
960963
Compute variance of groups, excluding missing values.
961964
@@ -964,14 +967,18 @@ def var(self, ddof=1, *args, **kwargs):
964967
ddof : int, default 1
965968
Degrees of freedom.
966969
970+
numeric_only : bool, default False
971+
Include only `float`, `int` or `boolean` data.
972+
973+
.. versionadded:: 1.5.0
974+
967975
Returns
968976
-------
969977
DataFrame or Series
970978
Variance of values within each group.
971979
"""
972980
nv.validate_resampler_func("var", args, kwargs)
973-
# error: Unexpected keyword argument "ddof" for "_downsample"
974-
return self._downsample("var", ddof=ddof) # type: ignore[call-arg]
981+
return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
975982

976983
@doc(GroupBy.size)
977984
def size(self):
@@ -1027,53 +1034,94 @@ def quantile(self, q=0.5, **kwargs):
10271034
Return a DataFrame, where the coulmns are groupby columns,
10281035
and the values are its quantiles.
10291036
"""
1030-
# error: Unexpected keyword argument "q" for "_downsample"
1031-
# error: Too many arguments for "_downsample"
1032-
return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg]
1037+
return self._downsample("quantile", q=q, **kwargs)
10331038

10341039

1035-
# downsample methods
1036-
for method in ["sum", "prod", "min", "max", "first", "last"]:
1040+
def _add_downsample_kernel(
1041+
name: str, args: tuple[str, ...], docs_class: type = GroupBy
1042+
) -> None:
1043+
"""
1044+
Add a kernel to Resampler.
1045+
1046+
Arguments
1047+
---------
1048+
name : str
1049+
Name of the kernel.
1050+
args : tuple
1051+
Arguments of the method.
1052+
docs_class : type
1053+
Class to get kernel docstring from.
1054+
"""
1055+
assert args in (
1056+
("numeric_only", "min_count"),
1057+
("numeric_only",),
1058+
("ddof", "numeric_only"),
1059+
(),
1060+
)
10371061

1038-
def f(
1039-
self,
1040-
_method: str = method,
1041-
numeric_only: bool | lib.NoDefault = lib.no_default,
1042-
min_count: int = 0,
1043-
*args,
1044-
**kwargs,
1045-
):
1046-
if numeric_only is lib.no_default:
1047-
if _method != "sum":
1062+
# Explicitly provide args rather than args/kwargs for API docs
1063+
if args == ("numeric_only", "min_count"):
1064+
1065+
def f(
1066+
self,
1067+
numeric_only: bool | lib.NoDefault = lib.no_default,
1068+
min_count: int = 0,
1069+
*args,
1070+
**kwargs,
1071+
):
1072+
nv.validate_resampler_func(name, args, kwargs)
1073+
if numeric_only is lib.no_default and name != "sum":
10481074
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
10491075
numeric_only = False
10501076

1051-
nv.validate_resampler_func(_method, args, kwargs)
1052-
return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)
1053-
1054-
f.__doc__ = getattr(GroupBy, method).__doc__
1055-
setattr(Resampler, method, f)
1056-
1077+
return self._downsample(
1078+
name, numeric_only=numeric_only, min_count=min_count
1079+
)
10571080

1058-
# downsample methods
1059-
for method in ["mean", "sem", "median", "ohlc"]:
1081+
elif args == ("numeric_only",):
1082+
# error: All conditional function variants must have identical signatures
1083+
def f( # type: ignore[misc]
1084+
self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs
1085+
):
1086+
nv.validate_resampler_func(name, args, kwargs)
1087+
return self._downsample(name, numeric_only=numeric_only)
1088+
1089+
elif args == ("ddof", "numeric_only"):
1090+
# error: All conditional function variants must have identical signatures
1091+
def f( # type: ignore[misc]
1092+
self,
1093+
ddof: int = 1,
1094+
numeric_only: bool | lib.NoDefault = lib.no_default,
1095+
*args,
1096+
**kwargs,
1097+
):
1098+
nv.validate_resampler_func(name, args, kwargs)
1099+
return self._downsample(name, ddof=ddof, numeric_only=numeric_only)
10601100

1061-
def g(self, _method=method, *args, **kwargs):
1062-
nv.validate_resampler_func(_method, args, kwargs)
1063-
return self._downsample(_method)
1101+
else:
1102+
# error: All conditional function variants must have identical signatures
1103+
def f( # type: ignore[misc]
1104+
self,
1105+
*args,
1106+
**kwargs,
1107+
):
1108+
nv.validate_resampler_func(name, args, kwargs)
1109+
return self._downsample(name)
10641110

1065-
g.__doc__ = getattr(GroupBy, method).__doc__
1066-
setattr(Resampler, method, g)
1111+
f.__doc__ = getattr(docs_class, name).__doc__
1112+
setattr(Resampler, name, f)
10671113

10681114

1069-
# series only methods
1115+
for method in ["sum", "prod", "min", "max", "first", "last"]:
1116+
_add_downsample_kernel(method, ("numeric_only", "min_count"))
1117+
for method in ["mean", "median"]:
1118+
_add_downsample_kernel(method, ("numeric_only",))
1119+
for method in ["sem"]:
1120+
_add_downsample_kernel(method, ("ddof", "numeric_only"))
1121+
for method in ["ohlc"]:
1122+
_add_downsample_kernel(method, ())
10701123
for method in ["nunique"]:
1071-
1072-
def h(self, _method=method):
1073-
return self._downsample(_method)
1074-
1075-
h.__doc__ = getattr(SeriesGroupBy, method).__doc__
1076-
setattr(Resampler, method, h)
1124+
_add_downsample_kernel(method, (), SeriesGroupBy)
10771125

10781126

10791127
class _GroupByMixin(PandasObject):

pandas/tests/resample/test_resample_api.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ def test_end_and_end_day_origin(
814814

815815

816816
@pytest.mark.parametrize(
817+
# expected_data is a string when op raises a ValueError
817818
"method, numeric_only, expected_data",
818819
[
819820
("sum", True, {"num": [25]}),
@@ -834,6 +835,21 @@ def test_end_and_end_day_origin(
834835
("last", True, {"num": [20]}),
835836
("last", False, {"cat": ["cat_2"], "num": [20]}),
836837
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
838+
("mean", True, {"num": [12.5]}),
839+
("mean", False, {"num": [12.5]}),
840+
("mean", lib.no_default, {"num": [12.5]}),
841+
("median", True, {"num": [12.5]}),
842+
("median", False, {"num": [12.5]}),
843+
("median", lib.no_default, {"num": [12.5]}),
844+
("std", True, {"num": [10.606601717798213]}),
845+
("std", False, "could not convert string to float"),
846+
("std", lib.no_default, {"num": [10.606601717798213]}),
847+
("var", True, {"num": [112.5]}),
848+
("var", False, "could not convert string to float"),
849+
("var", lib.no_default, {"num": [112.5]}),
850+
("sem", True, {"num": [7.5]}),
851+
("sem", False, "could not convert string to float"),
852+
("sem", lib.no_default, {"num": [7.5]}),
837853
],
838854
)
839855
def test_frame_downsample_method(method, numeric_only, expected_data):
@@ -845,20 +861,32 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
845861
resampled = df.resample("Y")
846862

847863
func = getattr(resampled, method)
848-
if method == "prod" and numeric_only is not True:
864+
if numeric_only is lib.no_default and method not in (
865+
"min",
866+
"max",
867+
"first",
868+
"last",
869+
"prod",
870+
):
849871
warn = FutureWarning
850-
msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
851-
elif method == "sum" and numeric_only is lib.no_default:
872+
msg = (
873+
f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated"
874+
)
875+
elif method in ("prod", "mean", "median") and numeric_only is not True:
852876
warn = FutureWarning
853-
msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated"
877+
msg = f"Dropping invalid columns in DataFrameGroupBy.{method} is deprecated"
854878
else:
855879
warn = None
856880
msg = ""
857881
with tm.assert_produces_warning(warn, match=msg):
858-
result = func(numeric_only=numeric_only)
859-
860-
expected = DataFrame(expected_data, index=expected_index)
861-
tm.assert_frame_equal(result, expected)
882+
if isinstance(expected_data, str):
883+
klass = TypeError if method == "var" else ValueError
884+
with pytest.raises(klass, match=expected_data):
885+
_ = func(numeric_only=numeric_only)
886+
else:
887+
result = func(numeric_only=numeric_only)
888+
expected = DataFrame(expected_data, index=expected_index)
889+
tm.assert_frame_equal(result, expected)
862890

863891

864892
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)
0