8000 PERF: SparseDataFrame._init_dict uses intermediary dict, not DataFrame by kernc · Pull Request #16883 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

PERF: SparseDataFrame._init_dict uses intermediary dict, not DataFrame #16883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 17, 2017
Prev Previous commit
Next Next commit
fixup! PERF: SparseDataFrame._init_dict uses intermediary dict, not D…
…ataFrame
  • Loading branch information
kernc committed Jul 12, 2017
commit b55b1a2fef4ab99036719cdc5d3c6dab70f20eb9
11 changes: 2 additions & 9 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def _init_dict(self, data, index, columns, dtype=None):
v = [v.get(i, nan) for i in index]

v = sp_maker(v)
sdict[_nan_to_np_nan(k)] = v
sdict[k] = v

# TODO: figure out how to handle this case, all nan's?
# add in any other columns we want to have (completeness)
Expand Down Expand Up @@ -846,13 +846,6 @@ def applymap(self, func):
return self.apply(lambda x: lmap(func, x))


def _nan_to_np_nan(value):
"""Normalize nan values to singleton np.NaN object so that when NaNs are
used as dict keys, getitem works.
"""
return np.nan if is_float(value) and isnull(value) else value


def to_manager(sdf, columns, index):
""" create and return the block manager from a dataframe of series,
columns, index
Expand All @@ -862,7 +855,7 @@ def to_manager(sdf, columns, index):
axes = [_ensure_index(columns), _ensure_index(index)]

return create_block_manager_from_arrays(
[sdf[_nan_to_np_nan(c)] for c in columns], columns, axes)
[sdf[c] for c in columns], columns, axes)


def stack_sparse_frame(frame):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/sparse/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,7 @@ def test_as_blocks(self):
assert list(df_blocks.keys()) == ['float64']
tm.assert_frame_equal(df_blocks['float64'], df)

@pytest.mark.xfail(reason='nan column names in _init_dict problematic')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

def test_nan_columnname(self):
# GH 8822
nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan])
Expand Down
0