8000 BUG: Groupy dropped nan groups from result when grouping over single column by phofl · Pull Request #36842 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG: Groupy dropped nan groups from result when grouping over single column #36842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Nov 4, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b384c51
Fix dropped nas with one group column an dropna=False
phofl Oct 3, 2020
08c3088
Add whatsnew
phofl Oct 3, 2020
47649aa
Fix failing test
phofl Oct 4, 2020
c93590b
Adress review
phofl Oct 6, 2020
c61ce7a
Address review comments
phofl Oct 6, 2020
ab333e4
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 7, 2020
dfa522a
Adress review comments
phofl Oct 7, 2020
2e3e1bf
Change type hint
phofl Oct 7, 2020
d067280
Change type annotation
phofl Oct 7, 2020
5b5b673
Fix type hint to index
phofl Oct 9, 2020
b0a0372
Fix type hint because Index can not be imported
phofl Oct 9, 2020
721d3d6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 9, 2020
3100511
Change import order
phofl Oct 9, 2020
6ca4324
Fix type hints
phofl Oct 10, 2020
06ce333
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 10, 2020
602d557
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 10, 2020
9d1c760
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 11, 2020
7e809da
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 14, 2020
551bec6
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 15, 2020
749161d
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 29, 2020
528fe0d
Fix pattern
phofl Oct 30, 2020
0d95d58
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 30, 2020
6dbd632
Merge branch 'master' into 35646
jreback Oct 31, 2020
9d7c403
Merge branch 'master' of https://github.com/pandas-dev/pandas into 35646
phofl Oct 31, 2020
00fe075
Merge branch '35646' of https://github.com/phofl/pandas into 35646
phofl Oct 31, 2020
f5b25cb
Add lost whatsnew
phofl Oct 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adress review comments
  • Loading branch information
phofl committed Oct 7, 2020
commit dfa522afd9e2e4dcf24da9a04bb2197b99005aab
6 changes: 3 additions & 3 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

from pandas._libs import algos, hashtable, lib
from pandas._libs.hashtable import unique_label_indices
from pandas._typing import IndexKeyFunc
from pandas._typing import IndexKeyFunc, Label

from pandas.core.dtypes.common import (
ensure_int64,
ensure_platform_int,
is_extension_array_dtype,
)
from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex
from pandas.core.dtypes.generic import ABCMultiIndex
from pandas.core.dtypes.missing import isna

import pandas.core.algorithms as algorithms
Expand Down Expand Up @@ -519,7 +519,7 @@ def get_flattened_list(


def get_indexer_dict(
label_list: List[ABCIndex], keys: List[np.ndarray]
label_list: List[np.ndarray], keys: Label
) -> Dict[Union[str, Tuple], np.ndarray]:
"""
Returns
Expand Down
8 changes: 1 addition & 7 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data,


def test_groupby_nan_included():
# GH 35646, GH 35542
# GH 35646
data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
df = pd.DataFrame(data)
grouped = df.groupby("group", dropna=False)
Expand All @@ -354,9 +354,3 @@ def test_groupby_nan_included():
tm.assert_numpy_array_equal(result_values, expected_values)
assert np.isnan(list(result.keys())[2])
assert list(result.keys())[0:2] == ["g1", "g2"]

result = grouped.mean()
expected = pd.DataFrame(
{"B": [1.0, 3.0, 2.5]}, index=pd.Index(["g1", "g2", np.nan], name="group")
)
tm.assert_frame_equal(result, expected)
15 changes: 15 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,3 +868,18 @@ def test_rolling_period_index(index, window, func, values):
result = getattr(ds.rolling(window, closed="left"), func)()
expected = pd.Series(values, index=index)
tm.assert_series_equal(result, expected)


def test_groupby_rolling_nan_included():
# GH 35542
data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
df = pd.DataFrame(data)
result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean()
expected = pd.DataFrame(
{"B": [0.0, 2.0, 3.0, 1.0, 4.0]},
index=pd.MultiIndex.from_tuples(
[("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)],
names=["group", None],
),
)
tm.assert_frame_equal(result, expected)
0