dispatch scalar DataFrame ops to Series

pandas-dev · jreback · Aug 14, 2018 · Aug 2, 2018 · Aug 2, 2018 · Aug 2, 2018
commit 7681092bcbffafd01bed83621318cc5b8208e4e9
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4940,6 +4940,20 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
         return self._constructor(new_data)
 
     def _combine_const(self, other, func, errors='raise', try_cast=True):
+        if lib.is_scalar(other) or np.ndim(other) == 0:
+            new_data = {i: func(self.iloc[:, i], other)
+                        for i, col in enumerate(self.columns)}
+
+            result = self._constructor(new_data, index=self.index, copy=False)
+            result.columns = self.columns
+            return result
+        elif np.ndim(other) == 2 and other.shape == self.shape:
+            new_data = {i: func(self.iloc[:, i], other[:, i])
+                        for i in range(len(self.columns))}
+            result = self._constructor(new_data, index=self.index, copy=False)
+            result.columns = self.columns
+            return result
+
         new_data = self._data.eval(func=func, other=other,
                                    errors=errors,
                                    try_cast=try_cast)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1311,7 +1311,7 @@ def na_op(x, y):
                 with np.errstate(all='ignore'):
                     result = method(y)
                 if result is NotImplemented:
-                    raise TypeError("invalid type comparison")
+                    return invalid_comparison(x, y, op)
             else:
                 result = op(x, y)
 
@@ -1706,7 +1706,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
             if fill_value is not None:
                 self = self.fillna(fill_value)
 
-            return self._combine_const(other, na_op, try_cast=True)
+            pass_func = na_op
+            if is_scalar(lib.item_from_zerodim(other)):
+                pass_func = op
+            return self._combine_const(other, pass_func, try_cast=True)
 
     f.__name__ = op_name
 

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -27,8 +27,8 @@ def test_df_float_none_comparison(self):
         df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
                           columns=['A', 'B', 'C'])
 
-        with pytest.raises(TypeError):
-            df.__eq__(None)
+        result = df == None
+        assert not result.any().any()
 
     def test_df_string_comparison(self):
         df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
@@ -201,8 +201,6 @@ def test_df_div_zero_series_does_not_commute(self):
 
 class TestFrameArithmetic(object):
 
-    @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
-                       strict=True)
     def test_df_sub_datetime64_not_ns(self):
         df = pd.DataFrame(pd.date_range('20130101', periods=3))
         dt64 = np.datetime64('2013-01-01')

diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -2759,9 +2759,14 @@ def test_where_datetime(self):
                             C=np.random.randn(5)))
 
         stamp = datetime(2013, 1, 3)
-        result = df[df > stamp]
+        with pytest.raises(TypeError):
+            df > stamp
+
+        result = df[df.iloc[:, :-1] > stamp]
+
         expected = df.copy()
         expected.loc[[0, 1], 'A'] = np.nan
+        expected.loc[:, 'C'] = np.nan
         assert_frame_equal(result, expected)
 
     def test_where_none(self):

diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
@@ -206,10 +206,15 @@ def test_timestamp_compare(self):
             right_f = getattr(operator, right)
 
             # no nats
-            expected = left_f(df, Timestamp('20010109'))
-            result = right_f(Timestamp('20010109'), df)
-            assert_frame_equal(result, expected)
-
+            if left in ['eq', 'ne']:
+                expected = left_f(df, Timestamp('20010109'))
+                result = right_f(Timestamp('20010109'), df)
+                assert_frame_equal(result, expected)
+            else:
+                with pytest.raises(TypeError):
+                    left_f(df, Timestamp('20010109'))
+                with pytest.raises(TypeError):
+                    right_f(Timestamp('20010109'), df)
             # nats
             expected = left_f(df, Timestamp('nat'))
             result = right_f(Timestamp('nat'), df)

diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py
@@ -898,7 +898,6 @@ def test_timedelta_ops_with_missing_values(self):
         scalar1 = pd.to_timedelta('00:00:01')
         scalar2 = pd.to_timedelta('00:00:02')
         timedelta_NaT = pd.to_timedelta('NaT')
-        NA = np.nan
 
         actual = scalar1 + scalar1
         assert actual == scalar2
@@ -966,10 +965,10 @@ def test_timedelta_ops_with_missing_values(self):
         actual = df1 - timedelta_NaT
         tm.assert_frame_equal(actual, dfn)
 
-        actual = df1 + NA
-        tm.assert_frame_equal(actual, dfn)
-        actual = df1 - NA
-        tm.assert_frame_equal(actual, dfn)
+        with pytest.raises(TypeError):
+            actual = df1 + np.nan
+        with pytest.raises(TypeError):
+            actual = df1 - np.nan
 
         actual = df1 + pd.NaT  # NaT is datetime, not timedelta
         tm.assert_frame_equal(actual, dfn)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1235,16 +1235,31 @@ def test_binop_other(self, op, value, dtype):
                 (operator.truediv, 'bool'),
                 (operator.mod, 'i8'),
                 (operator.mod, 'complex128'),
-                (operator.mod, '<M8[ns]'),
-                (operator.mod, '<m8[ns]'),
                 (operator.pow, 'bool')}
         if (op, dtype) in skip:
             pytest.skip("Invalid combination {},{}".format(op, dtype))
+
         e = DummyElement(value, dtype)
         s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
-        result = op(s, e).dtypes
-        expected = op(s, value).dtypes
-        assert_series_equal(result, expected)
+
+        invalid = {(operator.pow, '<M8[ns]'),
+                   (operator.mod, '<M8[ns]'),
+                   (operator.truediv, '<M8[ns]'),
+                   (operator.mul, '<M8[ns]'),
+                   (operator.add, '<M8[ns]'),
+                   (operator.pow, '<m8[ns]'),
+                   (operator.mod, '<m8[ns]'),
+                   (operator.mul, '<m8[ns]')}
+
+        if (op, dtype) in invalid:
+            with pytest.raises(TypeError):
+                result = op(s, e.value)
+        else:
+            # FIXME: Since dispatching to Series, this test no longer
+            # asserts anything meaningful
+            result = op(s, e.value).dtypes
+            expected = op(s, value).dtypes
+            assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize('typestr, holder', [

diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py
@@ -36,9 +36,7 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="block.eval incorrect",
-                                             strict=True))
+        pd.DataFrame
     ])
     @pytest.mark.parametrize('index', [
         pd.Int64Index(range(1, 11)),
@@ -54,7 +52,7 @@ class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
     def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
         # GH#19333
 
-        if (box is Series and
+        if (box in [Series, pd.DataFrame] and
                 type(scalar_td) is timedelta and index.dtype == 'f8'):
             raise pytest.xfail(reason="Cannot multiply timedelta by float")
 
@@ -141,11 +139,7 @@ def test_td64arr_add_sub_float(self, box, op, other):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Tries to cast df to "
-                                                    "Period",
-                                             strict=True,
-                                             raises=IncompatibleFrequency))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('freq', [None, 'H'])
     def test_td64arr_sub_period(self, box, freq):
@@ -186,8 +180,11 @@ def test_td64arr_sub_pi(self, box, tdi_freq, pi_freq):
     # -------------------------------------------------------------
     # Binary operations td64 arraylike and datetime-like
 
-    @pytest.mark.parametrize('box', [pd.Index, Series, pd.DataFrame],
-                             ids=lambda x: x.__name__)
+    @pytest.mark.parametrize('box', [
+        pd.Index,
+        Series,
+        pd.DataFrame
+    ], ids=lambda x: x.__name__)
     def test_td64arr_sub_timestamp_raises(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
         idx = tm.box_expected(idx, box)
@@ -199,9 +196,7 @@ def test_td64arr_sub_timestamp_raises(self, box):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     def test_td64arr_add_timestamp(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
@@ -216,9 +211,7 @@ def test_td64arr_add_timestamp(self, box):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     def test_td64_radd_timestamp(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
@@ -333,7 +326,8 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
 
         if box is pd.DataFrame and isinstance(scalar, np.ndarray):
             # raises ValueError
-            pytest.xfail(reason="DataFrame to broadcast incorrectly")
+            pytest.xfail(reason="reversed ops return incorrect answers "
+                                "instead of raising.")
 
         tdser = tm.box_expected(tdser, box)
         err = TypeError
@@ -392,11 +386,7 @@ def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype "
-                                                    "instead of "
-                                                    "datetime64[ns]",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     def test_td64arr_add_sub_timestamp(self, box):
         # GH#11925
@@ -505,10 +495,7 @@ class TestTimedeltaArraylikeMulDivOps(object):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Incorrectly returns "
-                                                    "m8[ns] instead of f8",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('scalar_td', [
         timedelta(minutes=5, seconds=4),
@@ -530,16 +517,17 @@ def test_td64arr_floordiv_tdscalar(self, box, scalar_td):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Incorrectly casts to f8",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('scalar_td', [
         timedelta(minutes=5, seconds=4),
         Timedelta('5m4s'),
         Timedelta('5m4s').to_timedelta64()])
     def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
         # GH#18831
+        if box is pd.DataFrame and isinstance(scalar_td, np.timedelta64):
+            pytest.xfail(reason="raises TypeError, not sure why")
+
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
         td1.iloc[2] = np.nan
 
@@ -554,10 +542,7 @@ def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns m8[ns] dtype "
-                                                    "instead of f8",
-                                             strict=True))
+        pd.DataFrame
    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('scalar_td', [
         timedelta(minutes=5, seconds=4),
@@ -584,11 +569,7 @@ def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="__mul__ op treats "
-                                                    "timedelta other as i8; "
-                                                    "rmul OK",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('scalar_td', [
         timedelta(minutes=5, seconds=4),
@@ -615,9 +596,7 @@ def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object-dtype",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)])
     def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
@@ -646,9 +625,7 @@ def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object-dtype",
-                                             strict=True))
+        pd.DataFrame
     ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('two', [2, 2.0, np.array(2), np.array(2.0)])
     def test_td64arr_div_numeric_scalar(self, box, two, tdser):
@@ -824,11 +801,8 @@ class TestTimedeltaArraylikeInvalidArithmeticOps(object):
     @pytest.mark.parametrize('box', [
         pd.Index,
         Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="raises ValueError "
-                                                    "instead of TypeError",
-                                             strict=True))
-    ])
+        pd.DataFrame
+    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('scalar_td', [
         timedelta(minutes=5, seconds=4),
         Timedelta('5m4s'),

diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
@@ -390,10 +390,10 @@ def test_bool_ops_raise_on_arithmetic(self):
                 with tm.assert_raises_regex(NotImplementedError, err_msg):
                     f(False, df.a)
 
-                with tm.assert_raises_regex(TypeError, err_msg):
+                with tm.assert_raises_regex(NotImplementedError, err_msg):
                     
4F90
f(False, df)
 
-                with tm.assert_raises_regex(TypeError, err_msg):
+                with tm.assert_raises_regex(NotImplementedError, err_msg):
                     f(df, True)
 
     def test_bool_ops_warn_on_arithmetic(self):