8000 DEPR: groupby nuisance warnings by rhshadrach · Pull Request #46010 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

DEPR: groupby nuisance warnings #46010

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
DEPR: Add warnings for deprecation of nuisance columns
  • Loading branch information
rhshadrach committed Feb 15, 2022
commit b4eba0f6b76d0a7fa9209357bb9759c77547c203
3 changes: 3 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3928,6 +3928,9 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde


def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
if how == "add":
# groupby internally uses "add" instead of "sum" in some places
how = "sum"
warnings.warn(
"Dropping invalid columns in "
f"{cls.__name__}.{how} is deprecated. "
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def test_in_numeric_groupby(self, data_for_grouping):
"C": [1, 1, 1, 1, 1, 1, 1, 1],
}
)
result = df.groupby("A").sum().columns
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby("A").sum().columns

if data_for_grouping.dtype._is_numeric:
expected = pd.Index(["B", "C"])
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,7 +1777,8 @@ def test_stack_multiple_bug(self):
multi = df.set_index(["DATE", "ID"])
multi.columns.name = "Params"
unst = multi.unstack("ID")
down = unst.resample("W-THU").mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
down = unst.resample("W-THU").mean()

rs = down.stack("ID")
xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/generic/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def test_metadata_propagation_indiv_groupby(self):
"D": np.random.randn(8),
}
)
result = df.groupby("A").sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby("A").sum()
tm.assert_metadata_equivalent(df, result)

def test_metadata_propagation_indiv_resample(self):
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ def test_basic(): # TODO: split this test
gb = df.groupby("A", observed=False)
exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True)
expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)})
result = gb.sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = gb.sum()
tm.assert_frame_equal(result, expected)

# GH 8623
Expand Down Expand Up @@ -344,7 +345,8 @@ def test_observed(observed):
gb = df.groupby(["A", "B"], observed=observed)
exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
result = gb.sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = gb.sum()
if not observed:
expected = cartesian_product_for_groupers(
expected, [cat1, cat2], list("AB"), fill_value=0
Expand Down
56 changes: 35 additions & 21 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,9 @@ def test_groupby_as_index_agg(df):
tm.assert_frame_equal(result, expected)

result2 = grouped.agg({"C": np.mean, "D": np.sum})
expected2 = grouped.mean()
expected2["D"] = grouped.sum()["D"]
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
expected2 = grouped.mean()
expected2["D"] = grouped.sum()["D"]
tm.assert_frame_equal(result2, expected2)

grouped = df.groupby("A", as_index=True)
Expand Down Expand Up @@ -762,7 +763,8 @@ def test_as_index_series_return_frame(df):
tm.assert_frame_equal(result2, expected2)

result = grouped["C"].sum()
expected = grouped.sum().loc[:, ["A", "C"]]
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
expected = grouped.sum().loc[:, ["A", "C"]]
assert isinstance(result, DataFrame)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -916,8 +918,9 @@ def test_omit_nuisance_warnings(df):
def test_omit_nuisance_python_multiple(three_group):
grouped = three_group.groupby(["A", "B"])

agged = grouped.agg(np.mean)
exp = grouped.mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
agged = grouped.agg(np.mean)
exp = grouped.mean()
tm.assert_frame_equal(agged, exp)


Expand All @@ -934,8 +937,9 @@ def test_empty_groups_corner(mframe):
)

grouped = df.groupby(["k1", "k2"])
result = grouped.agg(np.mean)
expected = grouped.mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = grouped.agg(np.mean)
expected = grouped.mean()
tm.assert_frame_equal(result, expected)

grouped = mframe[3:5].groupby(level=0)
Expand All @@ -957,7 +961,8 @@ def test_wrap_aggregated_output_multindex(mframe):
df["baz", "two"] = "peekaboo"

keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
agged = df.groupby(keys).agg(np.mean)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
agged = df.groupby(keys).agg(np.mean)
assert isinstance(agged.columns, MultiIndex)

def aggfun(ser):
Expand Down Expand Up @@ -1118,15 +1123,17 @@ def test_groupby_with_hier_columns():
# add a nuisance column
sorted_columns, _ = columns.sortlevel(0)
df["A", "foo"] = "bar"
result = df.groupby(level=0).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby(level=0).mean()
tm.assert_index_equal(result.columns, df.columns[:-1])


def test_grouping_ndarray(df):
grouped = df.groupby(df["A"].values)

result = grouped.sum()
expected = df.groupby("A").sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = grouped.sum()
expected = df.groupby("A").sum()
tm.assert_frame_equal(
result, expected, check_names=False
) # Note: no names when grouping by value
Expand Down Expand Up @@ -1154,13 +1161,15 @@ def test_groupby_wrong_multi_labels():


def test_groupby_series_with_name(df):
result = df.groupby(df["A"]).mean()
result2 = df.groupby(df["A"], as_index=False).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby(df["A"]).mean()
result2 = df.groupby(df["A"], as_index=False).mean()
assert result.index.name == "A"
assert "A" in result2

result = df.groupby([df["A"], df["B"]]).mean()
result2 = df.groupby([df["A"], df["B"]], as_index=False).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([df["A"], df["B"]]).mean()
result2 = df.groupby([df["A"], df["B"]], as_index=False).mean()
assert result.index.names == ("A", "B")
assert "A" in result2
assert "B" in result2
Expand Down Expand Up @@ -1306,8 +1315,9 @@ def test_groupby_unit64_float_conversion():


def test_groupby_list_infer_array_like(df):
result = df.groupby(list(df["A"])).mean()
expected = df.groupby(df["A"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby(list(df["A"])).mean()
expected = df.groupby(df["A"]).mean()
tm.assert_frame_equal(result, expected, check_names=False)

with pytest.raises(KeyError, match=r"^'foo'$"):
Expand Down Expand Up @@ -1420,7 +1430,8 @@ def test_groupby_2d_malformed():
d["zeros"] = [0, 0]
d["ones"] = [1, 1]
d["label"] = ["l1", "l2"]
tmp = d.groupby(["group"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
tmp = d.groupby(["group"]).mean()
res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
tm.assert_numpy_array_equal(tmp.values, res_values)
Expand Down Expand Up @@ -1586,10 +1597,12 @@ def f(group):

def test_no_dummy_key_names(df):
# see gh-1291
result = df.groupby(df["A"].values).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby(df["A"].values).sum()
assert result.index.name is None

result = df.groupby([df["A"].values, df["B"].values]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([df["A"].values, df["B"].values]).sum()
assert result.index.names == (None, None)


Expand Down Expand Up @@ -2566,7 +2579,8 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype():
)

gb = df.groupby(by=["x"])
result = gb.sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = gb.sum()
tm.assert_frame_equal(result, expected)


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_groupby_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,5 +109,6 @@ def test_groupby_resample_preserves_subclass(obj):
df = df.set_index("Date")

# Confirm groupby.resample() preserves dataframe type
result = df.groupby("Buyer").resample("5D").sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby("Buyer").resample("5D").sum()
assert isinstance(result, obj)
30 changes: 18 additions & 12 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ def test_column_select_via_attr(self, df):
tm.assert_series_equal(result, expected)

df["mean"] = 1.5
result = df.groupby("A").mean()
expected = df.groupby("A").agg(np.mean)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby("A").mean()
expected = df.groupby("A").agg(np.mean)
tm.assert_frame_equal(result, expected)

def test_getitem_list_of_columns(self):
Expand Down Expand Up @@ -284,25 +285,29 @@ def test_grouper_column_and_index(self):
{"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]},
index=idx,
)
result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean()
expected = df_multi.reset_index().groupby(["B", "inner"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean()
expected = df_multi.reset_index().groupby(["B", "inner"]).mean()
tm.assert_frame_equal(result, expected)

# Test the reverse grouping order
result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean()
expected = df_multi.reset_index().groupby(["inner", "B"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean()
expected = df_multi.reset_index().groupby(["inner", "B"]).mean()
tm.assert_frame_equal(result, expected)

# Grouping a single-index frame by a column and the index should
# be equivalent to resetting the index and grouping by two columns
df_single = df_multi.reset_index("outer")
result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
expected = df_single.reset_index().groupby(["B", "inner"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
expected = df_single.reset_index().groupby(["B", "inner"]).mean()
tm.assert_frame_equal(result, expected)

# Test the reverse grouping order
result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
expected = df_single.reset_index().groupby(["inner", "B"]).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
expected = df_single.reset_index().groupby(["inner", "B"]).mean()
tm.assert_frame_equal(result, expected)

def test_groupby_levels_and_columns(self):
Expand Down Expand Up @@ -376,8 +381,9 @@ def test_empty_groups(self, df):
def test_groupby_grouper(self, df):
grouped = df.groupby("A")

result = df.groupby(grouped.grouper).mean()
expected = grouped.mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby(grouped.grouper).mean()
expected = grouped.mean()
tm.assert_frame_equal(result, expected)

def test_groupby_dict_mapping(self):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/groupby/test_index_as_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def series():
],
)
def test_grouper_index_level_as_string(frame, key_strs, groupers):
result = frame.groupby(key_strs).mean()
expected = frame.groupby(groupers).mean()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = frame.groupby(key_strs).mean()
expected = frame.groupby(groupers).mean()
tm.assert_frame_equal(result, expected)


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def g(dfgb, arg2):
def h(df, arg3):
return df.x + df.y - arg3

result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100)

# Assert the results here
index = Index(["A", "B", "C"], name="group")
Expand Down
31 changes: 21 additions & 10 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,17 @@ def test_groupby_with_timegrouper(self):
)
expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64")

result1 = df.resample("5D").sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result1 = df.resample("5D").sum()
tm.assert_frame_equal(result1, expected)

df_sorted = df.sort_index()
result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result2 = df_sorted.groupby(Grouper(freq="5D")).sum()
tm.assert_frame_equal(result2, expected)

result3 = df.groupby(Grouper(freq="5D")).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result3 = df.groupby(Grouper(freq="5D")).sum()
tm.assert_frame_equal(result3, expected)

@pytest.mark.parametrize("should_sort", [True, False])
Expand Down Expand Up @@ -186,7 +189,8 @@ def test_timegrouper_with_reg_groups(self):
}
).set_index(["Date", "Buyer"])

result = df.groupby([Grouper(freq="A"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([Grouper(freq="A"), "Buyer"]).sum()
tm.assert_frame_equal(result, expected)

expected = DataFrame(
Expand All @@ -201,7 +205,8 @@ def test_timegrouper_with_reg_groups(self):
],
}
).set_index(["Date", "Buyer"])
result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum()
tm.assert_frame_equal(result, expected)

df_original = DataFrame(
Expand Down Expand Up @@ -239,10 +244,12 @@ def test_timegrouper_with_reg_groups(self):
}
).set_index(["Date", "Buyer"])

result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum()
tm.assert_frame_equal(result, expected)

result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum()
expected = DataFrame(
{
"Buyer": "Carl Joe Mark".split(),
Expand All @@ -258,11 +265,15 @@ def test_timegrouper_with_reg_groups(self):

# passing the name
df = df.reset_index()
result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
tm.assert_frame_equal(result, expected)

with pytest.raises(KeyError, match="'The grouper name foo is not found'"):
df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum()
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
with pytest.raises(
KeyError, match="'The grouper name foo is not found'"
):
df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum()

# passing the level
df = df.set_index("Date")
Expand Down
Loading
0