8000 BUG: DataFrame.merge(suffixes=) does not respect None by charlesdong1991 · Pull Request #24819 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

BUG: DataFrame.merge(suffixes=) does not respect None #24819

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 6, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
changes based on discussion
  • Loading branch information
charlesdong1991 committed Feb 2, 2019
commit 3f65bf19b2739612e068c3659ba5f2d6bdd13377
11 changes: 0 additions & 11 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1967,17 +1967,6 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
if len(to_rename) == 0:
return left, right
else:
# if column name is string, raise error if suffix is a combination of
# empty string and None, or two Nones
if isinstance(to_rename[0], str):
if not lsuffix and not rsuffix:
raise ValueError('columns overlap but no suffix specified: '
'{rename}'.format(rename=to_rename))
else:
# if not, only suffix with (None, None) will raise error
if lsuffix is None and rsuffix is None:
raise ValueError('columns overlap but no suffix specified: '
'{rename}'.format(rename=to_rename))

def renamer(x, suffix):
"""Rename the left and right indices.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make a proper doc-string (Parameters / Returns)

Expand Down
7 changes: 4 additions & 3 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,12 @@ def merge_ordered(left, right, on=None,
left DataFrame
fill_method : {'ffill', None}, default None
Interpolation method for data
suffixes : Sequence, default is ("_x", "_y")
suffixes : Sequence or None, default is ("_x", "_y")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The or None can be removed I think (we removed that change from this PR)

A length-2 sequence where each element is optionally a string
indicating the suffix to add to overlapping column names in
`left` and `right` respectively. Pass a value of `None` instead
of a string to indicate that the column name from `left` or
`right` should be left as-is, with no suffix. At least one of the
values must not be None.
`right` should be left as-is, with no suffix.
how : {'left', 'right', 'outer', 'inner'}, default 'outer'
* left: use only keys from left frame (SQL: left outer join)
* right: use only keys from right frame (SQL: right outer join)
Expand Down Expand Up @@ -493,6 +492,8 @@ def __init__(self, left, right, how='inner', on=None,

self.copy = copy
self.suffixes = suffixes
if suffixes is None:
self.suffixes = (None, None)
self.sort = sort

self.left_index = left_index
Expand Down
28 changes: 9 additions & 19 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1538,10 +1538,17 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]),
("a", "b", dict(suffixes=("_x", None)), ["a", "b"]),
("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]),
(0, 0, dict(suffixes=(["_a", None])), ["0_a", 0]),
(0, 0, dict(suffixes=["_a", None]), ["0_a", 0]),
(0, 0, dict(suffixes=('', None)), ["0", 0]),
("a", "a", dict(), ["a_x", "a_y"]),
(0, 0, dict(), ["0_x", "0_y"])
(0, 0, dict(), ["0_x", "0_y"]),
("a", "a", dict(suffixes=(None, None)), ["a", "a"]),
("a", "a", dict(suffixes=[None, None]), ["a", "a"]),
(0, 0, dict(suffixes=(None, None)), [0, 0]),
("a", "a", dict(suffixes=['', None]), ["a", "a"]),
(0, 0, dict(suffixes=['', None]), ["0", 0]),
(0, 0, dict(suffixes=None), [0, 0]),
("a", "a", dict(suffixes=None), ["a", "a"])
])
def test_merge_suffix(col1, col2, kwargs, expected_cols):
# issue: 24782
Expand All @@ -1556,20 +1563,3 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):

result = pd.merge(a, b, left_index=True, right_index=True, **kwargs)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("col, suffixes", [
('a', (None, None)),
('a', [None, None]),
('a', ('', None)),
('a', [None, '']),
(0, (None, None))
])
def test_merge_suffix_errors(col, suffixes):
# issue: 24782
a = pd.DataFrame({col: [1, 2, 3]})
b = pd.DataFrame({col: [4, 5, 6]})

with pytest.raises(ValueError,
match="columns overlap but no suffix specified"):
a.merge(b, left_index=True, right_index=True, suffixes=suffixes)
0