-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
ENH: Allow for join between two multi-index dataframe instances #20356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b581789
2d61a12
4d4acc5
66d82fb
c091bb4
d56ebcd
0cdad73
c2a65aa
571fdf7
ae2d8ad
405c1a4
1d2d9f3
f0ac24d
5ac40ff
be862c7
e10cbde
06d48d0
f54c151
c75108d
c690260
4092b34
cfd5fcc
6c8131d
ecaf515
8b5d0aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1141,12 +1141,12 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', | |
return join_func(lkey, rkey, count, **kwargs) | ||
|
||
|
||
def _complete_multilevel_join(left, right, how, dropped_levels, | ||
join_idx, lidx, ridx): | ||
def _restore_dropped_levels_multijoin(left, right, dropped_level_names, | ||
join_idx, lidx, ridx): | ||
""" | ||
*this is an internal non-public method* | ||
|
||
Returns the levels, labels and names of a multilevel to multilevel join | ||
Returns the levels, labels and names of a multil-index to multi-index join. | ||
Depending on the type of join, this method restores the appropriate | ||
dropped levels of the joined multi-index. The method relies on lidx, ridx | ||
which hold the index positions of left and right, where a join was feasible | ||
|
@@ -1157,19 +1157,18 @@ def _complete_multilevel_join(left, right, how, dropped_levels, | |
left index | ||
right : Index | ||
right index | ||
dropped_level_names : str array | ||
list of non-common levels | ||
join_idx : Index | ||
the index of the join between the common levels of left and right | ||
how : {'left', 'right', 'outer', 'inner'} | ||
lidx : intp array | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't do these types formally, but I think something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you update |
||
left indexer | ||
right : intp array | ||
right indexer | ||
dropped_levels : str array | ||
list of non-common levels | ||
|
||
Returns | ||
------- | ||
levels : intp array | ||
levels : intp ndarray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these the correct return types? At a glance, it looks like they'd be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this correct? |
||
levels of combined multiindexes | ||
labels : str array | ||
labels of combined multiindexes | ||
|
@@ -1178,12 +1177,20 @@ def _complete_multilevel_join(left, right, how, dropped_levels, | |
|
||
""" | ||
|
||
# Convert to 1 level multi-index if not | ||
if not isinstance(join_idx, MultiIndex): | ||
harisbal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
levels = [join_idx.values] | ||
labels = [list(range(0, len(join_idx)))] | ||
harisbal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
names = [join_idx.name] | ||
join_idx = MultiIndex(levels=levels, labels=labels, | ||
names=names, verify_integrity=False) | ||
|
||
join_levels = join_idx.levels | ||
join_labels = join_idx.labels | ||
join_names = join_idx.names | ||
|
||
# lidx and ridx hold the indexes where the join occured | ||
# for left and right respectively. If left (right) is None it means that | ||
# for left and right respectively. If left (right) is None then | ||
# the join occured on all indices of left (right) | ||
if lidx is None: | ||
lidx = range(0, len(left)) | ||
|
@@ -1192,27 +1199,26 @@ def _complete_multilevel_join(left, right, how, dropped_levels, | |
ridx = range(0, len(right)) | ||
|
||
# Iterate through the levels that must be restored | ||
for dl in dropped_levels: | ||
if dl in left.names: | ||
for dropped_level_name in dropped_level_names: | ||
if dropped_level_name in left.names: | ||
idx = left | ||
indexer = lidx | ||
else: | ||
idx = right | ||
indexer = ridx | ||
|
||
# The index of the level name to be restored | ||
name_idx = idx.names.index(dl) | ||
name_idx = idx.names.index(dropped_level_name) | ||
|
||
restore_levels = idx.levels[name_idx].values | ||
restore_labels = idx.labels[name_idx] | ||
|
||
join_levels = join_levels.__add__([restore_levels]) | ||
join_names = join_names.__add__([dl]) | ||
|
||
# Inject -1 in the labels list where a join was not possible | ||
# IOW indexer[i]=-1 | ||
labels = [restore_labels[i] if i != -1 else -1 for i in indexer] | ||
join_labels = join_labels.__add__([labels]) | ||
labels = idx.labels[name_idx] | ||
restore_labels = [labels[i] if i != -1 else -1 for i in indexer] | ||
harisbal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
join_levels = join_levels.__add__([restore_levels]) | ||
harisbal marked this conversation as resolved.
Show resolved
Hide resolved
|
||
join_labels = join_labels.__add__([restore_labels]) | ||
join_names = join_names.__add__([dropped_level_name]) | ||
|
||
return join_levels, join_labels, join_names | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.