10000 BUG: CategoricalIndex.equals casting non-categories to np.nan by jbrockmendel · Pull Request #37667 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG: CategoricalIndex.equals casting non-categories to np.nan #37667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 8, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
BUG: CategoricalIndex.equals casting incorrectly
  • Loading branch information
jbrockmendel committed Nov 6, 2020
commit 6430336066da84bcfde32f59e7ffdd78fe28529b
16 changes: 10 additions & 6 deletions pandas/core/indexes/category.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna

from pandas.core import accessor
from pandas.core.arrays.categorical import Categorical, contains
Expand Down Expand Up @@ -263,6 +263,7 @@ def _is_dtype_compat(self, other) -> Categorical:
values = other
if not is_list_like(values):
values = [values]

cat = Categorical(other, dtype=self.dtype)
other = CategoricalIndex(cat)
if not other.isin(values).all():
Expand All @@ -271,6 +272,12 @@ def _is_dtype_compat(self, other) -> Categorical:
)
other = other._values

if not ((other == values) | (isna(other) & isna(values))).all():
# GH#???? see test_equals_non_category
raise TypeError(
"categories must match existing categories when appending"
)

return other

def equals(self, other: object) -> bool:
Expand All @@ -291,13 +298,10 @@ def equals(self, other: object) -> bool:

try:
other = self._is_dtype_compat(other)
if isinstance(other, type(self)):
other = other._data
return self._data.equals(other)
except (TypeError, ValueError):
pass
return False

return False
return self._data.equals(other)

# --------------------------------------------------------------------
# Rendering Methods
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,14 @@ def test_equals_categorical_unordered(self):
assert not a.equals(c)
assert not b.equals(c)

def test_equals_non_category(self):
# Case where other contains a value not among ci's categories ("D") and
# also contains np.nan
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
other = Index(["A", "B", "D", np.nan])

assert not ci.equals(other)

def test_frame_repr(self):
df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"]))
result = repr(df)
Expand Down
0