8000 dispatch scalar DataFrame ops to Series by jbrockmendel · Pull Request #22163 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

dispatch scalar DataFrame ops to Series #22163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
dispatch scalar DataFrame ops to Series
  • Loading branch information
jbrockmendel committed Aug 2, 2018
commit 7681092bcbffafd01bed83621318cc5b8208e4e9
14 changes: 14 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4940,6 +4940,20 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
return self._constructor(new_data)

def _combine_const(self, other, func, errors='raise', try_cast=True):
if lib.is_scalar(other) or np.ndim(other) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is is pretty annoything that we have to do this, I would make an explict function maybe is_any_scalar I think as we have these types of checks all over. pls make an issue for this.

8000

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

new_data = {i: func(self.iloc[:, i], other)
for i, col in enumerate(self.columns)}

result = self._constructor(new_data, index=self.index, copy=False)
result.columns = self.columns
return result
elif np.ndim(other) == 2 and other.shape == self.shape:
new_data = {i: func(self.iloc[:, i], other[:, i])
for i in range(len(self.columns))}
result = self._constructor(new_data, index=self.index, copy=False)
result.columns = self.columns
return result

new_data = self._data.eval(func=func, other=other,
errors=errors,
try_cast=try_cast)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,7 @@ def na_op(x, y):
with np.errstate(all='ignore'):
result = method(y)
if result is NotImplemented:
raise TypeError("invalid type comparison")
return invalid_comparison(x, y, op)
else:
result = op(x, y)

Expand Down Expand Up @@ -1706,7 +1706,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
if fill_value is not None:
self = self.fillna(fill_value)

return self._combine_const(other, na_op, try_cast=True)
pass_func = na_op
if is_scalar(lib.item_from_zerodim(other)):
pass_func = op
return self._combine_const(other, pass_func, try_cast=True)

f.__name__ = op_name

Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_df_float_none_comparison(self):
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
columns=['A', 'B', 'C'])

with pytest.raises(TypeError):
df.__eq__(None)
result = df == None
assert not result.any().any()

def test_df_string_comparison(self):
df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
Expand Down Expand Up @@ -201,8 +201,6 @@ def test_df_div_zero_series_does_not_commute(self):

class TestFrameArithmetic(object):

@pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
strict=True)
def test_df_sub_datetime64_not_ns(self):
df = pd.DataFrame(pd.date_range('20130101', periods=3))
dt64 = np.datetime64('2013-01-01')
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2759,9 +2759,14 @@ def test_where_datetime(self):
C=np.random.randn(5)))

stamp = datetime(2013, 1, 3)
result = df[df > stamp]
with pytest.raises(TypeError):
df > stamp

result = df[df.iloc[:, :-1] > stamp]

expected = df.copy()
expected.loc[[0, 1], 'A'] = np.nan
expected.loc[:, 'C'] = np.nan
assert_frame_equal(result, expected)

def test_where_none(self):
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/frame/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,15 @@ def test_timestamp_compare(self):
right_f = getattr(operator, right)

# no nats
expected = left_f(df, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), df)
assert_frame_equal(result, expected)

if left in ['eq', 'ne']:
expected = left_f(df, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), df)
assert_frame_equal(result, expected)
else:
with pytest.raises(TypeError):
left_f(df, Timestamp('20010109'))
with pytest.raises(TypeError):
right_f(Timestamp('20010109'), df)
# nats
expected = left_f(df, Timestamp('nat'))
result = right_f(Timestamp('nat'), df)
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,6 @@ def test_timedelta_ops_with_missing_values(self):
scalar1 = pd.to_timedelta('00:00:01')
scalar2 = pd.to_timedelta('00:00:02')
timedelta_NaT = pd.to_timedelta('NaT')
NA = np.nan

actual = scalar1 + scalar1
assert actual == scalar2
Expand Down Expand Up @@ -966,10 +965,10 @@ def test_timedelta_ops_with_missing_values(self):
actual = df1 - timedelta_NaT
tm.assert_frame_equal(actual, dfn)

actual = df1 + NA
tm.assert_frame_equal(actual, dfn)
actual = df1 - NA
tm.assert_frame_equal(actual, dfn)
with pytest.raises(TypeError):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this raising? this is a big change if you don't allow nan to act as NaT in ops

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the current behavior for Series and Index.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs a subsection in the whatsnew then, marked as an api change.

actual = df1 + np.nan
with pytest.raises(TypeError):
actual = df1 - np.nan

actual = df1 + pd.NaT # NaT is datetime, not timedelta
tm.assert_frame_equal(actual, dfn)
Expand Down
25 changes: 20 additions & 5 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,16 +1235,31 @@ def test_binop_other(self, op, value, dtype):
(operator.truediv, 'bool'),
(operator.mod, 'i8'),
(operator.mod, 'complex128'),
(operator.mod, '<M8[ns]'),
(operator.mod, '<m8[ns]'),
(operator.pow, 'bool')}
if (op, dtype) in skip:
pytest.skip("Invalid combination {},{}".format(op, dtype))

e = DummyElement(value, dtype)
s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
result = op(s, e).dtypes
expected = op(s, value).dtypes
assert_series_equal(result, expected)

invalid = {(operator.pow, '<M8[ns]'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pull this out and parametrize

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test already has two layers of parametrization; it isn't clear how to pull this out without making it more verbose+repetitive. Let me give this some thought and circle back.

(operator.mod, '<M8[ns]'),
(operator.truediv, '<M8[ns]'),
(operator.mul, '<M8[ns]'),
(operator.add, '<M8[ns]'),
(operator.pow, '<m8[ns]'),
(operator.mod, '<m8[ns]'),
(operator.mul, '<m8[ns]')}

if (op, dtype) in invalid:
with pytest.raises(TypeError):
result = op(s, e.value)
else:
# FIXME: Since dispatching to Series, this test no longer
# asserts anything meaningful
result = op(s, e.value).dtypes
expected = op(s, value).dtypes
assert_series_equal(result, expected)


@pytest.mark.parametrize('typestr, holder', [
Expand Down
74 changes: 24 additions & 50 deletions pandas/tests/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="block.eval incorrect",
strict=True))
pd.DataFrame
])
@pytest.mark.parametrize('index', [
pd.Int64Index(range(1, 11)),
Expand All @@ -54,7 +52,7 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
# GH#19333

if (box is Series and
if (box in [Series, pd.DataFrame] and
type(scalar_td) is timedelta and index.dtype == 'f8'):
raise pytest.xfail(reason="Cannot multiply timedelta by float")

Expand Down Expand Up @@ -141,11 +139,7 @@ def test_td64arr_add_sub_float(self, box, op, other):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Tries to cast df to "
"Period",
strict=True,
raises=IncompatibleFrequency))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('freq', [None, 'H'])
def test_td64arr_sub_period(self, box, freq):
Expand Down Expand Up @@ -186,8 +180,11 @@ def test_td64arr_sub_pi(self, box, tdi_freq, pi_freq):
# -------------------------------------------------------------
# Binary operations td64 arraylike and datetime-like

@pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame],
ids=lambda x: x.__name__)
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pd.DataFrame
], ids=lambda x: x.__name__)
def test_td64arr_sub_timestamp_raises(self, box):
idx = TimedeltaIndex(['1 day', '2 day'])
idx = tm.box_expected(idx, box)
Expand All @@ -199,9 +196,7 @@ def test_td64arr_sub_timestamp_raises(self, box):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object dtype",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
def test_td64arr_add_timestamp(self, box):
idx = TimedeltaIndex(['1 day', '2 day'])
Expand All @@ -216,9 +211,7 @@ def test_td64arr_add_timestamp(self, box):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object dtype",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
def test_td64_radd_timestamp(self, box):
idx = TimedeltaIndex(['1 day', '2 day'])
Expand Down Expand Up @@ -333,7 +326,8 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):

if box is pd.DataFrame and isinstance(scalar, np.ndarray):
# raises ValueError
pytest.xfail(reason="DataFrame to broadcast incorrectly")
pytest.xfail(reason="reversed ops return incorrect answers "
"instead of raising.")

tdser = tm.box_expected(tdser, box)
err = TypeError
Expand Down Expand Up @@ -392,11 +386,7 @@ def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object dtype "
"instead of "
"datetime64[ns]",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
def test_td64arr_add_sub_timestamp(self, box):
# GH#11925
Expand Down Expand Up @@ -505,10 +495,7 @@ class TestTimedeltaArraylikeMulDivOps(object):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Incorrectly returns "
"m8[ns] instead of f8",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('scalar_td', [
timedelta(minutes=5, seconds=4),
Expand All @@ -530,16 +517,17 @@ def test_td64arr_floordiv_tdscalar(self, box, scalar_td):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Incorrectly casts to f8",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('scalar_td', [
timedelta(minutes=5, seconds=4),
Timedelta('5m4s'),
Timedelta('5m4s').to_timedelta64()])
def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
# GH#18831
if box is pd.DataFrame and isinstance(scalar_td, np.timedelta64):
pytest.xfail(reason="raises TypeError, not sure why")

td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
td1.iloc[2] = np.nan

Expand All @@ -554,10 +542,7 @@ def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns m8[ns] dtype "
"instead of f8",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('scalar_td', [
timedelta(minutes=5, seconds=4),
Expand All @@ -584,11 +569,7 @@ def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="__mul__ op treats "
"timedelta other as i8; "
"rmul OK",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('scalar_td', [
timedelta(minutes=5, seconds=4),
Expand All @@ -615,9 +596,7 @@ def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object-dtype",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)])
def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
Expand Down Expand Up @@ -646,9 +625,7 @@ def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="Returns object-dtype",
strict=True))
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('two', [2, 2.0, np.array(2), np.array(2.0)])
def test_td64arr_div_numeric_scalar(self, box, two, tdser):
Expand Down Expand Up @@ -824,11 +801,8 @@ class TestTimedeltaArraylikeInvalidArithmeticOps(object):
@pytest.mark.parametrize('box', [
pd.Index,
Series,
pytest.param(pd.DataFrame,
marks=pytest.mark.xfail(reason="raises ValueError "
"instead of TypeError",
strict=True))
])
pd.DataFrame
], ids=lambda x: x.__name__)
@pytest.mark.parametrize('scalar_td', [
timedelta(minutes=5, seconds=4),
Timedelta('5m4s'),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,10 @@ def test_bool_ops_raise_on_arithmetic(self):
with tm.assert_raises_regex(NotImplementedError, err_msg):
f(False, df.a)

with tm.assert_raises_regex(TypeError, err_msg):
with tm.assert_raises_regex(NotImplementedError, err_msg):
4F90 f(False, df)

with tm.assert_raises_regex(TypeError, err_msg):
with tm.assert_raises_regex(NotImplementedError, err_msg):
f(df, True)

def test_bool_ops_warn_on_arithmetic(self):
Expand Down
0