8000 ENH/BUG: Use Kleene logic for groupby any/all by mzeitlin11 · Pull Request #40819 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH/BUG: Use Kleene logic for groupby any/all #40819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 40 commits into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
088ca14
WIP
mzeitlin11 Apr 6, 2021
2554921
TSTS: Consolidate groupby any, all
mzeitlin11 Apr 6, 2021
9a8f9c9
Fixture fixup
mzeitlin11 Apr 6, 2021
5ca9c4b
Unmove test
mzeitlin11 Apr 6, 2021
68fd995
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 6, 2021
6530491
Merge branch 'tst/any_all' into enh/any_all_kleene
mzeitlin11 Apr 6, 2021
26146c2
Add initial tests
mzeitlin11 Apr 6, 2021
20f475d
Add whatsnew, bench
mzeitlin11 Apr 6, 2021
924b38e
Clean up edge case
mzeitlin11 Apr 6, 2021
423f43f
Fix typo
mzeitlin11 Apr 6, 2021
b1408ac
Avoid copy if possible
mzeitlin11 Apr 6, 2021
47ef037
Fix old level test
mzeitlin11 Apr 7, 2021
4415060
Precommit fixup
mzeitlin11 Apr 7, 2021
bb04c1c
Clean up print
mzeitlin11 Apr 7, 2021
9c90886
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 7, 2021
ef3fbe2
precommit fixup
mzeitlin11 Apr 7, 2021
f4c8a8a
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 8, 2021
1c3cb7d
Split out test
mzeitlin11 Apr 8, 2021
7cbf85b
Split whatsnew
mzeitlin11 Apr 8, 2021
809b8a4
whatsnew typo
mzeitlin11 Apr 8, 2021
58fd33a
Modify dispatch, add mixed test
mzeitlin11 Apr 8, 2021
80a65bb
Fix post proc check
mzeitlin11 Apr 8, 2021
c9b9d5f
Address review comments
mzeitlin11 Apr 8, 2021
7514568
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 9, 2021
740ad7b
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 10, 2021
a116bed
Name arguments better
mzeitlin11 Apr 10, 2021
8a428d4
Use -1 as mask signal
mzeitlin11 Apr 10, 2021
8e3c5be
Consistent typing
mzeitlin11 Apr 10, 2021
b627618
Don't use inspect
mzeitlin11 Apr 10, 2021
23b3b64
precommit fixup
mzeitlin11 Apr 10, 2021
a30496c
Clean up docstring
mzeitlin11 Apr 10, 2021
3051a99
Merge remote-tracking branch 'origin/master' into enh/any_all_kleene
mzeitlin11 Apr 12, 2021
4cd2833
Update doc/source/whatsnew/v1.3.0.rst
mzeitlin11 Apr 13, 2021
98cd401
Update doc/source/whatsnew/v1.3.0.rst
mzeitlin11 Apr 13, 2021
a92c637
Update doc/source/whatsnew/v1.3.0.rst
mzeitlin11 Apr 13, 2021
7c5c8e6
Update pandas/tests/groupby/test_any_all.py
mzeitlin11 Apr 13, 2021
c66d1fd
Update pandas/tests/groupby/test_any_all.py
mzeitlin11 Apr 13, 2021
0950234
Merge branch 'master' into enh/any_all_kleene
mzeitlin11 Apr 13, 2021
c81c1a5
Simplify teasts
mzeitlin11 Apr 13, 2021
d2b8ad0
Merge branch 'enh/any_all_kleene' of github.com:/mzeitlin11/pandas in…
mzeitlin11 Apr 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Simplify teasts
  • Loading branch information
mzeitlin11 committed Apr 13, 2021
commit c81c1a59d7dff7ac48774cbeb5a8d7f0bf69bfa1
37 changes: 16 additions & 21 deletions pandas/tests/groupby/test_any_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pandas import (
DataFrame,
Index,
Series,
isna,
)
import pandas._testing as tm
Expand Down Expand Up @@ -74,33 +75,27 @@ def test_bool_aggs_dup_column_labels(bool_agg_func):
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize(
# expected_data indexed as [[skipna=False/any, skipna=False/all],
# [skipna=True/any, skipna=True/all]]
"data,expected_data",
"data",
[
([False, False, False], [[False, False], [False, False]]),
([True, True, True], [[True, True], [True, True]]),
([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]),
([False, pd.NA, False], [[pd.NA, False], [False, False]]),
([True, pd.NA, True], [[True, pd.NA], [True, True]]),
([True, pd.NA, False], [[True, False], [True, False]]),
[False, False, False],
[True, True, True],
[pd.NA, pd.NA, pd.NA],
[False, pd.NA, False],
[True, pd.NA, True],
[True, pd.NA, False],
],
)
def test_masked_kleene_logic(bool_agg_func, data, expected_data, skipna):
def test_masked_kleene_logic(bool_agg_func, skipna, data):
# GH#37506
df = DataFrame(data, dtype="boolean")
expected = DataFrame(
[expected_data[skipna][bool_agg_func == "all"]], dtype="boolean", index=[1]
)
ser = Series(data, dtype="boolean")

result = df.groupby([1, 1, 1]).agg(bool_agg_func, skipna=skipna)
tm.assert_frame_equal(result, expected)
# The result should match aggregating on the whole series. Correctness
# there is verified in test_reductions.py::test_any_all_boolean_kleene_logic
expected_data = getattr(ser, bool_agg_func)(skipna=skipna)
expected = Series(expected_data, dtype="boolean")

# The expected result we compared to should match aggregating on the whole
# series
result = getattr(df[0], bool_agg_func)(skipna=skipna)
expected = expected_data[skipna][bool_agg_func == "all"]
assert (result is pd.NA and expected is pd.NA) or result == expected
result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
Expand Down
37 changes: 23 additions & 14 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,20 +941,29 @@ def test_all_any_params(self):
with pytest.raises(NotImplementedError, match=msg):
s.all(bool_only=True)

def test_all_any_boolean(self):
# Check skipna, with boolean type
s1 = Series([pd.NA, True], dtype="boolean")
s2 = Series([pd.NA, False], dtype="boolean")
assert s1.all(skipna=False) is pd.NA # NA && True => NA
assert s1.all(skipna=True)
assert s2.any(skipna=False) is pd.NA # NA || False => NA
assert not s2.any(skipna=True)

# GH-33253: all True / all False values buggy with skipna=False
s3 = Series([True, True], dtype="boolean")
s4 = Series([False, False], dtype="boolean")
assert s3.all(skipna=False)
assert not s4.any(skipna=False)
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize(
# expected_data indexed as [[skipna=False/any, skipna=False/all],
# [skipna=True/any, skipna=True/all]]
"data,expected_data",
[
([False, False, False], [[False, False], [False, False]]),
([True, True, True], [[True, True], [True, True]]),
([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]),
([False, pd.NA, False], [[pd.NA, False], [False, False]]),
([True, pd.NA, True], [[True, pd.NA], [True, True]]),
([True, pd.NA, False], [[True, False], [True, False]]),
],
)
def test_any_all_boolean_kleene_logic(
self, bool_agg_func, skipna, data, expected_data
):
ser = Series(data, dtype="boolean")
expected = expected_data[skipna][bool_agg_func == "all"]

result = getattr(ser, bool_agg_func)(skipna=skipna)
assert (result is pd.NA and expected is pd.NA) or result == expected

@pytest.mark.parametrize(
"bool_agg_func,expected",
Expand Down
0