8000 ENH: Allow for join between two multi-index dataframe instances by harisbal · Pull Request #20356 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: Allow for join between two multi-index dataframe instances #20356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Nov 15, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b581789
Allow for join between two multi-index dataframe instances
Sep 19, 2018
2d61a12
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Sep 19, 2018
4d4acc5
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Oct 7, 2018
66d82fb
Review
harisbal Oct 8, 2018
c091bb4
Merge remote-tracking branch 'upstream/master' into multi-index-join
Oct 8, 2018
d56ebcd
Second review
harisbal Oct 9, 2018
0cdad73
Merge remote-tracking branch 'upstream/master' into multi-index-join
Oct 9, 2018
c2a65aa
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Oct 10, 2018
571fdf7
Merge remote-tracking branch 'upstream/master' into multi-index-join
Nov 1, 2018
ae2d8ad
Review
harisbal Nov 1, 2018
405c1a4
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Nov 1, 2018
1d2d9f3
Fix ci
harisbal Nov 3, 2018
f0ac24d
Merge branch 'master' into multi-index-join
Nov 3, 2018
5ac40ff
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Nov 3, 2018
be862c7
Update v0.24.0.txt
harisbal Nov 4, 2018
e10cbde
Update docstring _restore_dropped_levels_multijoin
harisbal Nov 4, 2018
06d48d0
Update docstring _restore_dropped_levels_multijoin
harisbal Nov 4, 2018
f54c151
Merge remote-tracking branch 'upstream/master' into multi-index-join
Nov 4, 2018
c75108d
Merge remote-tracking branch 'origin/multi-index-join' into multi-ind…
harisbal Nov 5, 2018
c690260
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Nov 6, 2018
4092b34
updated comments
harisbal Nov 6, 2018
cfd5fcc
Refactoring
harisbal Nov 6, 2018
6c8131d
Review
harisbal Nov 10, 2018
ecaf515
Merge remote-tracking branch 'upstream/master' into multi-index-join
harisbal Nov 10, 2018
8b5d0aa
Merge remote-tracking branch 'upstream/master' into harisbal-multi-in…
TomAugspurger Nov 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge remote-tracking branch 'upstream/master' into multi-index-join
  • Loading branch information
harisbal authored and harisbal committed Oct 7, 2018
commit 4d4acc5c20a68906cf458793adb4b94d89b1534b
4 changes: 2 additions & 2 deletions 4 pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3124,8 +3124,8 @@ def get_value(self, series, key):
iloc = self.get_loc(key)
return s[iloc]
except KeyError:
if (len(self) > 0
and (self.holds_integer() or self.is_boolean())):
if (len(self) > 0 and
(self.holds_integer() or self.is_boolean())):
raise
elif is_integer(key):
return s[key]
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,11 @@ def _get_merge_keys(self):

left, right = self.left, self.right

is_lkey = lambda x: is_array_like(x) and len(x) == len(left)
is_rkey = lambda x: is_array_like(x) and len(x) == len(right)
def is_lkey(x):
return is_array_like(x) and len(x) == len(left)

def is_rkey(x):
return is_array_like(x) and len(x) == len(right)

# Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
# user could, for example, request 'left_index' and 'left_by'. In a
Expand Down Expand Up @@ -968,11 +971,11 @@ def _maybe_coerce_merge_keys(self):

# boolean values are considered as numeric, but are still allowed
# to be merged on object boolean values
elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk))
and not is_numeric_dtype(rk)):
elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) and not
is_numeric_dtype(rk)):
raise ValueError(msg)
elif (not is_numeric_dtype(lk)
and (is_numeric_dtype(rk) and not is_bool_dtype(rk))):
elif (not is_numeric_dtype(lk) and
(is_numeric_dtype(rk) and not is_bool_dtype(rk))):
raise ValueError(msg)
elif is_datetimelike(lk) and not is_datetimelike(rk):
raise ValueError(msg)
Expand Down Expand Up @@ -1526,6 +1529,9 @@ def flip(xs):
def _get_multiindex_indexer(join_keys, index, sort):
from functools import partial

def i8copy(a):
return a.astype('i8', subok=False, copy=True)

# bind `sort` argument
fkeys = partial(_factorize_keys, sort=sort)

Expand All @@ -1534,7 +1540,6 @@ def _get_multiindex_indexer(join_keys, index, sort):
if sort:
rlab = list(map(np.take, rlab, index.labels))
else:
i8copy = lambda a: a.astype('i8', subok=False, copy=True)
rlab = list(map(i8copy, index.labels))

# fix right labels if there were any nulls
Expand Down Expand Up @@ -1680,8 +1685,11 @@ def _sort_labels(uniques, left, right):

def _get_join_keys(llab, rlab, shape, sort):

def pred(i):
return not is_int64_overflow_possible(shape[:i])

# how many levels can be done without overflow
pred = lambda i: not is_int64_overflow_possible(shape[:i])

nlev = next(filter(pred, range(len(shape), 0, -1)))

# get keys for the first `nlev` levels
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/reshape/merge/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ def test_left_join_multi_index(self):
icols = ['1st', '2nd', '3rd']

def bind_cols(df):
iord = lambda a: 0 if a != a else ord(a)
f = lambda ts: ts.map(iord) - ord('a')
def iord(a):
return 0 if a != a else ord(a)

def f(ts):
return ts.map(iord) - ord('a')

return (f(df['1st']) + f(df['3rd']) * 1e2 +
df['2nd'].fillna(0) * 1e4)

Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0