From d877ae9243604d6adcce45192a0de962485a67d8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 31 May 2019 16:30:18 -0500 Subject: [PATCH 1/8] ENH --- doc/source/user_guide/groupby.rst | 23 ++++++ doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/generic.py | 70 ++++++++++++++++ .../tests/groupby/aggregate/test_aggregate.py | 81 ++++++++++++++++--- 4 files changed, 166 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 9895fc606f70d..87805b18fe9c2 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -568,6 +568,29 @@ For a grouped ``DataFrame``, you can rename in a similar manner: 'mean': 'bar', 'std': 'baz'})) +.. note:: + + In general, the output column names should be unique. You can't apply + the same function (or two functions with the same name) to the same + column. + + .. ipython:: python + :okexcept: + + grouped['C'].agg(['sum', 'sum']) + + + Pandas *does* allow you to provide multiple lambdas. In this case, pandas + will mangle the name of the (nameless) lambda functions, appending ``_`` + to each subsequent lambda. + + .. ipython:: python + + grouped['C'].agg([lambda x: x.max() - x.min(), + lambda x: x.median() - x.mean()]) + + + .. _groupby.aggregate.named: Named Aggregation diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2b1a61186dca6..a8369b01045ad 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -95,6 +95,7 @@ Other Enhancements - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`) +- Supported for multiple lambdas in the same aggregation for :meth:`GroupBy.aggregate` (:issue:`26430`). - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`) - Error message for missing required imports now includes the original import error's text (:issue:`23868`) - :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 35ffa552913ae..f067bf82675ea 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -25,6 +25,7 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_bool, is_datetimelike, is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar) +from pandas.core.dtypes.inference import is_dict_like, is_list_like from pandas.core.dtypes.missing import isna, notna from pandas._typing import FrameOrSeries @@ -208,6 +209,8 @@ def aggregate(self, func, *args, **kwargs): raise TypeError("Must provide 'func' or tuples of " "'(column, aggfunc).") + func = _maybe_mangle_lambdas(func) + result, how = self._aggregate(func, _level=_level, *args, **kwargs) if how is None: return result @@ -830,6 +833,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs): if isinstance(func_or_funcs, abc.Iterable): # Catch instances of lists / tuples # but not the class list / tuple itself. + func_or_funcs = _maybe_mangle_lambdas(func_or_funcs) ret = self._aggregate_multiple_funcs(func_or_funcs, (_level or 0) + 1) if relabeling: @@ -1710,3 +1714,69 @@ def _normalize_keyword_aggregation(kwargs): order.append((column, com.get_callable_name(aggfunc) or aggfunc)) return aggspec, columns, order + + +def _make_lambda(func, i): + def f(*args, **kwargs): + return func(*args, **kwargs) + f.__name__ = "".format(i) + return f + + +def _managle_lambda_list(aggfuncs): + i = 0 + aggfuncs2 = [] + for aggfunc in aggfuncs: + if com.get_callable_name(aggfunc) == "": + if i > 0: + aggfunc = _make_lambda(aggfunc, i) + i += 1 + aggfuncs2.append(aggfunc) + + return aggfuncs2 + + +def _maybe_mangle_lambdas(agg_spec): + """ + Make new lambdas with unique names. + + Parameters + ---------- + agg_spec : Any + An argument to NDFrameGroupBy.agg. + Non-dict-like `agg_spec` are pass through as is. + For dict-like `agg_spec` a new spec is returned + with name-mangled lambdas. + + Returns + ------- + mangled : Any + Same type as the input. + + Examples + -------- + >>> _maybe_mangle_lambdas('sum') + 'sum' + + >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP + [()>, + .f(*args, **kwargs)>] + """ + is_dict = is_dict_like(agg_spec) + if not (is_dict or is_list_like(agg_spec)): + return agg_spec + agg_spec2 = type(agg_spec)() # dict or OrderdDict + + if is_dict: + for key in agg_spec: + aggfuncs = agg_spec[key] + if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): + aggfuncs2 = _managle_lambda_list(aggfuncs) + else: + aggfuncs2 = aggfuncs + + agg_spec2[key] = aggfuncs2 or aggfuncs + else: + agg_spec2 = _managle_lambda_list(agg_spec) + + return agg_spec2 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 801b99fed5ce6..66ea36163746c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -10,6 +10,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, compat, concat from pandas.core.base import SpecificationError +from pandas.core.groupby.generic import _maybe_mangle_lambdas from pandas.core.groupby.grouper import Grouping import pandas.util.testing as tm @@ -210,15 +211,6 @@ def test_multiple_functions_tuples_and_non_tuples(df): tm.assert_frame_equal(result, expected) -def test_agg_multiple_functions_too_many_lambdas(df): - grouped = df.groupby('A') - funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - - msg = 'Function names must be unique, found multiple named ' - with pytest.raises(SpecificationError, match=msg): - grouped.agg(funcs) - - def test_more_flexible_frame_multi_function(df): grouped = df.groupby('A') @@ -458,3 +450,74 @@ def test_agg_namedtuple(self): expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) tm.assert_frame_equal(result, expected) + + +class TestLambdaMangling: + + def test_maybe_mangle_lambdas_passthrough(self): + assert _maybe_mangle_lambdas('mean') == 'mean' + assert _maybe_mangle_lambdas(lambda x: x).__name__ == '' + assert [x.__name__ for x in _maybe_mangle_lambdas([lambda x: x]) + ] == [''] + + def test_maybe_mangle_lambdas_listlike(self): + aggfuncs = [lambda x: 1, lambda x: 2] + result = _maybe_mangle_lambdas(aggfuncs) + assert result[0].__name__ == '' + assert result[1].__name__ == '' + assert aggfuncs[0](None) == result[0](None) + assert aggfuncs[1](None) == result[1](None) + + def test_maybe_mangle_lambdas(self): + func = { + 'A': [lambda x: 0, lambda x: 1] + } + result = _maybe_mangle_lambdas(func) + assert result['A'][0].__name__ == '' + assert result['A'][1].__name__ == '' + + def test_maybe_mangle_lambdas_args(self): + func = { + 'A': [lambda x, a, b=1: (0, a, b), lambda x: 1] + } + result = _maybe_mangle_lambdas(func) + assert result['A'][0].__name__ == '' + assert result['A'][1].__name__ == '' + + assert func['A'][0](0, 1) == (0, 1, 1) + assert func['A'][0](0, 1, 2) == (0, 1, 2) + assert func['A'][0](0, 2, b=3) == (0, 2, 3) + + def test_maybe_mangle_lambdas_named(self): + func = OrderedDict([('C', np.mean), + ('D', OrderedDict([('foo', np.mean), + ('bar', np.mean)]))]) + result = _maybe_mangle_lambdas(func) + assert result == func + + def test_basic(self): + df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) + + expected = pd.DataFrame({("B", ""): [0, 0], + ("B", ""): [1, 1]}, + index=pd.Index([0, 1], name='A')) + tm.assert_frame_equal(result, expected) + + def test_mangle_series_groupby(self): + gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) + result = gr.agg([lambda x: 0, lambda x: 1]) + expected = pd.DataFrame({'': [0, 0], '': [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") + def test_with_kwargs(self): + f1 = lambda x, y, b=1: x.sum() + y + b + f2 = lambda x, y, b=2: x.sum() + y * b + result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) + expected = pd.DataFrame({'': [4], '': [6]}) + tm.assert_frame_equal(result, expected) + + result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) + expected = pd.DataFrame({'': [13], '': [30]}) + tm.assert_frame_equal(result, expected) From 54c36a141b8b6c8ec1a94398b27aea8e158df014 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Jun 2019 11:00:13 -0500 Subject: [PATCH 2/8] updates --- doc/source/whatsnew/v0.25.0.rst | 20 +++++++ pandas/core/groupby/generic.py | 55 ++++++++++++++----- .../tests/groupby/aggregate/test_aggregate.py | 34 +++++++++--- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d2d1c9b7ee546..3ccffd657bb1d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -74,6 +74,26 @@ a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.depreca See :ref:`groupby.aggregate.named` for more. +.. _whatsnew_0250.enhancements.multiple_lambdas: + +Groupby Aggregation with multiple lambdas +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can now provide multiple lambda functions to a list-like aggregation in +:class:`pandas.core.groupby.GroupBy.agg` (:issue:`26430`). + +.. ipython:: python + + animals.groupby('kind').height.agg([ + lambda x: x.iloc[0], lambda x: x.iloc[-1] + ]) + + animals.groupby('kind').agg([ + lambda x: x.iloc[0] - x.iloc[1], + lambda x: x.iloc[0] + x.iloc[1] + ]) + +Previously, these raised a ``SpecificationError``. .. _whatsnew_0250.enhancements.multi_index_repr: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f067bf82675ea..b6e731d5f39c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -48,6 +48,7 @@ NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) # TODO(typing) the return value on this callable should be any *scalar*. AggScalar = Union[str, Callable[..., Any]] +ScalarResult = typing.TypeVar("ScalarResult") # TODO: fix & move to _typing. def whitelist_method_generator(base_class: Type[GroupBy], @@ -1716,24 +1717,52 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, order -def _make_lambda(func, i): +def _make_lambda( + func: Callable[..., ScalarResult], i: int +) -> Callable[..., ScalarResult]: + """ + Make a new function with name + + Parameters + ---------- + func : Callable + The lambda function to call. + i : int + The counter to use for the name. + + Returns + ------- + Callable + Same as the caller but with name + """ def f(*args, **kwargs): return func(*args, **kwargs) f.__name__ = "".format(i) return f -def _managle_lambda_list(aggfuncs): +def _managle_lambda_list( + aggfuncs: typing.Sequence[Callable[..., ScalarResult]] +) -> typing.Sequence[Callable[..., ScalarResult]]: + """ + Possibly mangle a list of aggfuncs. + + Notes + ----- + If just one aggfunc is passed, the name will not be mangeld. + """ + if len(aggfuncs) <= 1: + # don't mangle for .agg([lambda x: .]) + return aggfuncs i = 0 - aggfuncs2 = [] + mangled_aggfuncs = [] for aggfunc in aggfuncs: if com.get_callable_name(aggfunc) == "": - if i > 0: - aggfunc = _make_lambda(aggfunc, i) + aggfunc = _make_lambda(aggfunc, i) i += 1 - aggfuncs2.append(aggfunc) + mangled_aggfuncs.append(aggfunc) - return aggfuncs2 + return mangled_aggfuncs def _maybe_mangle_lambdas(agg_spec): @@ -1765,18 +1794,18 @@ def _maybe_mangle_lambdas(agg_spec): is_dict = is_dict_like(agg_spec) if not (is_dict or is_list_like(agg_spec)): return agg_spec - agg_spec2 = type(agg_spec)() # dict or OrderdDict + mangled_aggspec = type(agg_spec)() # dict or OrderdDict if is_dict: for key in agg_spec: aggfuncs = agg_spec[key] if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): - aggfuncs2 = _managle_lambda_list(aggfuncs) + mangled_aggfuncs = _managle_lambda_list(aggfuncs) else: - aggfuncs2 = aggfuncs + mangled_aggfuncs = aggfuncs - agg_spec2[key] = aggfuncs2 or aggfuncs + mangled_aggspec[key] = mangled_aggfuncs or aggfuncs else: - agg_spec2 = _managle_lambda_list(agg_spec) + mangled_aggspec = _managle_lambda_list(agg_spec) - return agg_spec2 + return mangled_aggspec diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 66ea36163746c..5f62e037d6dc3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -354,6 +354,12 @@ def test_series_named_agg_duplicates_raises(self): with pytest.raises(SpecificationError): gr.agg(a='sum', b='sum') + def test_mangled(self): + gr = pd.Series([1, 2, 3]).groupby([0, 0, 1]) + result = gr.agg(a=lambda x: 0, b=lambda x: 1) + expected = pd.Series([0, 1], index=['a', 'b']) + tm.assert_frame_equal(result, expected) + class TestNamedAggregationDataFrame: def test_agg_relabel(self): @@ -451,19 +457,29 @@ def test_agg_namedtuple(self): c=("B", "count")) tm.assert_frame_equal(result, expected) + def test_mangled(self): + df = pd.DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) + result = df.groupby("A").agg( + b=("B", lambda x: 0), + c=("C", lambda x: 1) + ) + expected = pd.DataFrame({"b": [0, 0], "c": [1, 1]}, + index=pd.Index([0, 1], name='A')) + tm.assert_frame_equal(result, expected) + class TestLambdaMangling: def test_maybe_mangle_lambdas_passthrough(self): assert _maybe_mangle_lambdas('mean') == 'mean' assert _maybe_mangle_lambdas(lambda x: x).__name__ == '' - assert [x.__name__ for x in _maybe_mangle_lambdas([lambda x: x]) - ] == [''] + # don't mangel single lambda. + assert _maybe_mangle_lambdas([lambda x: x])[0].__name__ == '' def test_maybe_mangle_lambdas_listlike(self): aggfuncs = [lambda x: 1, lambda x: 2] result = _maybe_mangle_lambdas(aggfuncs) - assert result[0].__name__ == '' + assert result[0].__name__ == '' assert result[1].__name__ == '' assert aggfuncs[0](None) == result[0](None) assert aggfuncs[1](None) == result[1](None) @@ -473,7 +489,7 @@ def test_maybe_mangle_lambdas(self): 'A': [lambda x: 0, lambda x: 1] } result = _maybe_mangle_lambdas(func) - assert result['A'][0].__name__ == '' + assert result['A'][0].__name__ == '' assert result['A'][1].__name__ == '' def test_maybe_mangle_lambdas_args(self): @@ -481,7 +497,7 @@ def test_maybe_mangle_lambdas_args(self): 'A': [lambda x, a, b=1: (0, a, b), lambda x: 1] } result = _maybe_mangle_lambdas(func) - assert result['A'][0].__name__ == '' + assert result['A'][0].__name__ == '' assert result['A'][1].__name__ == '' assert func['A'][0](0, 1) == (0, 1, 1) @@ -499,7 +515,7 @@ def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) - expected = pd.DataFrame({("B", ""): [0, 0], + expected = pd.DataFrame({("B", ""): [0, 0], ("B", ""): [1, 1]}, index=pd.Index([0, 1], name='A')) tm.assert_frame_equal(result, expected) @@ -507,7 +523,7 @@ def test_basic(self): def test_mangle_series_groupby(self): gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) result = gr.agg([lambda x: 0, lambda x: 1]) - expected = pd.DataFrame({'': [0, 0], '': [1, 1]}) + expected = pd.DataFrame({'': [0, 0], '': [1, 1]}) tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") @@ -515,9 +531,9 @@ def test_with_kwargs(self): f1 = lambda x, y, b=1: x.sum() + y + b f2 = lambda x, y, b=2: x.sum() + y * b result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) - expected = pd.DataFrame({'': [4], '': [6]}) + expected = pd.DataFrame({'': [4], '': [6]}) tm.assert_frame_equal(result, expected) result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) - expected = pd.DataFrame({'': [13], '': [30]}) + expected = pd.DataFrame({'': [13], '': [30]}) tm.assert_frame_equal(result, expected) From 9c2bcf2ed830b09b72495bf1481df3a083615cc1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 24 Jun 2019 11:52:10 -0500 Subject: [PATCH 3/8] fix assert --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5f62e037d6dc3..ea59cde54f17b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -357,7 +357,7 @@ def test_series_named_agg_duplicates_raises(self): def test_mangled(self): gr = pd.Series([1, 2, 3]).groupby([0, 0, 1]) result = gr.agg(a=lambda x: 0, b=lambda x: 1) - expected = pd.Series([0, 1], index=['a', 'b']) + expected = pd.DataFrame({'a': [0, 0], 'b': [1, 1]}) tm.assert_frame_equal(result, expected) From d5490465dae7defe3f9f89d1e81afd588631cd9f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 25 Jun 2019 06:38:56 -0500 Subject: [PATCH 4/8] updates * use functools.partial * remove dead or --- pandas/core/groupby/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b6e731d5f39c8..34e7889e4bee7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -5,9 +5,9 @@ These are user facing as the result of the ``df.groupby(...)`` operations, which here returns a DataFrameGroupBy object. """ - from collections import OrderedDict, abc, namedtuple import copy +import functools from functools import partial from textwrap import dedent import typing @@ -1758,7 +1758,8 @@ def _managle_lambda_list( mangled_aggfuncs = [] for aggfunc in aggfuncs: if com.get_callable_name(aggfunc) == "": - aggfunc = _make_lambda(aggfunc, i) + aggfunc = functools.partial(aggfunc) + aggfunc.__name__ = ''.format(i) i += 1 mangled_aggfuncs.append(aggfunc) @@ -1788,7 +1789,7 @@ def _maybe_mangle_lambdas(agg_spec): 'sum' >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP - [()>, + [, .f(*args, **kwargs)>] """ is_dict = is_dict_like(agg_spec) @@ -1797,14 +1798,13 @@ def _maybe_mangle_lambdas(agg_spec): mangled_aggspec = type(agg_spec)() # dict or OrderdDict if is_dict: - for key in agg_spec: - aggfuncs = agg_spec[key] + for key, aggfuncs in agg_spec.items(): if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): mangled_aggfuncs = _managle_lambda_list(aggfuncs) else: mangled_aggfuncs = aggfuncs - mangled_aggspec[key] = mangled_aggfuncs or aggfuncs + mangled_aggspec[key] = mangled_aggfuncs else: mangled_aggspec = _managle_lambda_list(agg_spec) From 3ece0a5addc342827aaccaeff9e7f27dec731539 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 25 Jun 2019 14:03:41 -0500 Subject: [PATCH 5/8] fixups * remove type * remove dead code * remove release note --- doc/source/whatsnew/v0.25.0.rst | 1 - pandas/core/groupby/generic.py | 45 +++++++++++---------------------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3ccffd657bb1d..8835dc40a023f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -147,7 +147,6 @@ Other Enhancements - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`) -- Supported for multiple lambdas in the same aggregation for :meth:`GroupBy.aggregate` (:issue:`26430`). - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`) - Error message for missing required imports now includes the original import error's text (:issue:`23868`) - :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 34e7889e4bee7..d13e15d167f2b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -11,7 +11,7 @@ from functools import partial from textwrap import dedent import typing -from typing import Any, Callable, FrozenSet, Iterator, List, Type, Union +from typing import Any, Callable, FrozenSet, Iterator, Type, Union import warnings import numpy as np @@ -23,9 +23,9 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - ensure_int64, ensure_platform_int, is_bool, is_datetimelike, - is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar) -from pandas.core.dtypes.inference import is_dict_like, is_list_like + ensure_int64, ensure_platform_int, is_bool, is_datetimelike, is_dict_like, + is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype, + is_scalar) from pandas.core.dtypes.missing import isna, notna from pandas._typing import FrameOrSeries @@ -1703,7 +1703,10 @@ def _normalize_keyword_aggregation(kwargs): # process normally, then fixup the names. # TODO(Py35): When we drop python 3.5, change this to # defaultdict(list) - aggspec = OrderedDict() # type: typing.OrderedDict[str, List[AggScalar]] + # TODO: aggspec type: typing.OrderedDict[str, List[AggScalar]] + # May be hitting https://github.com/python/mypy/issues/5958 + # saying it doesn't have an attribute __name__ + aggspec = OrderedDict() order = [] columns, pairs = list(zip(*kwargs.items())) @@ -1717,39 +1720,21 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, order -def _make_lambda( - func: Callable[..., ScalarResult], i: int -) -> Callable[..., ScalarResult]: - """ - Make a new function with name - - Parameters - ---------- - func : Callable - The lambda function to call. - i : int - The counter to use for the name. - - Returns - ------- - Callable - Same as the caller but with name - """ - def f(*args, **kwargs): - return func(*args, **kwargs) - f.__name__ = "".format(i) - return f - - def _managle_lambda_list( aggfuncs: typing.Sequence[Callable[..., ScalarResult]] ) -> typing.Sequence[Callable[..., ScalarResult]]: """ Possibly mangle a list of aggfuncs. + Returns + ------- + mangled: list-like + A new AggSpec sequence, where lambdas have been converted + to have unique names. + Notes ----- - If just one aggfunc is passed, the name will not be mangeld. + If just one aggfunc is passed, the name will not be mangled. """ if len(aggfuncs) <= 1: # don't mangle for .agg([lambda x: .]) From dde761010dc3bbad4bd5c54eaece5dbf05457337 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 25 Jun 2019 14:15:04 -0500 Subject: [PATCH 6/8] fixup types --- pandas/core/groupby/generic.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d13e15d167f2b..ce48b311228b1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -48,7 +48,10 @@ NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) # TODO(typing) the return value on this callable should be any *scalar*. AggScalar = Union[str, Callable[..., Any]] -ScalarResult = typing.TypeVar("ScalarResult") # TODO: fix & move to _typing. +# TODO: validate types on ScalarResult and move to _typing +# Blocked from using by https://github.com/python/mypy/issues/1484 +# See note at _mangle_lambda_list +ScalarResult = typing.TypeVar("ScalarResult") def whitelist_method_generator(base_class: Type[GroupBy], @@ -1720,9 +1723,13 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, order -def _managle_lambda_list( - aggfuncs: typing.Sequence[Callable[..., ScalarResult]] -) -> typing.Sequence[Callable[..., ScalarResult]]: +# TODO: Can't use, because mypy doesn't like us setting __name__ +# error: "partial[Any]" has no attribute "__name__" +# the type is: +# typing.Sequence[Callable[..., ScalarResult]] +# -> typing.Sequence[Callable[..., ScalarResult]]: + +def _managle_lambda_list(aggfuncs): """ Possibly mangle a list of aggfuncs. From 6581abc177123949d0d8978a1a23fb64f9f0da23 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Jun 2019 08:51:55 -0500 Subject: [PATCH 7/8] doc --- pandas/core/groupby/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5c728c291c6e7..e2ae83ff87996 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1733,6 +1733,10 @@ def _managle_lambda_list(aggfuncs): """ Possibly mangle a list of aggfuncs. + Parameters + ---------- + aggfuncs : Sequence + Returns ------- mangled: list-like From be712d84f916e436ce5ebfb57a4c2e0b6e34a7a6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Jun 2019 09:08:46 -0500 Subject: [PATCH 8/8] types --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e2ae83ff87996..f06b88b63f8b9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -11,7 +11,7 @@ from functools import partial from textwrap import dedent import typing -from typing import Any, Callable, FrozenSet, Iterator, Type, Union +from typing import Any, Callable, FrozenSet, Iterator, Sequence, Type, Union import warnings import numpy as np @@ -1729,7 +1729,7 @@ def _normalize_keyword_aggregation(kwargs): # typing.Sequence[Callable[..., ScalarResult]] # -> typing.Sequence[Callable[..., ScalarResult]]: -def _managle_lambda_list(aggfuncs): +def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: """ Possibly mangle a list of aggfuncs. @@ -1762,7 +1762,7 @@ def _managle_lambda_list(aggfuncs): return mangled_aggfuncs -def _maybe_mangle_lambdas(agg_spec): +def _maybe_mangle_lambdas(agg_spec: Any) -> Any: """ Make new lambdas with unique names.