Reverted behavior change when input and output are the same kind

pandas-dev · jreback · May 3, 2021 · Apr 2, 2021 · Apr 10, 2021 · Apr 10, 2021
commit f2069a7b214ea0ac15b75ea854d09af825cac47d
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -304,7 +304,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
 
 
 def maybe_downcast_numeric(
-    result: ArrayLike, dtype: DtypeObj, do_round: bool = False
+    result: ArrayLike, dtype: DtypeObj, do_round: bool = False, same_kind: bool = False
 ) -> ArrayLike:
     """
     Subset of maybe_downcast_to_dtype restricted to numeric dtypes.
@@ -314,6 +314,9 @@ def maybe_downcast_numeric(
     result : ndarray or ExtensionArray
     dtype : np.dtype or ExtensionDtype
     do_round : bool
+    same_kind: bool
+        Whether to only possibly downcast when result.dtype is the same kind
+        as dtype.
 
     Returns
     -------
@@ -332,6 +335,8 @@ def trans(x):
         # don't allow upcasts here (except if empty)
         if result.dtype.itemsize <= dtype.itemsize and result.size:
             return result
+    elif same_kind:
+        return result
 
     if is_bool_dtype(dtype) or is_integer_dtype(dtype):
 

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -55,6 +55,7 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
+    is_numeric_dtype,
     is_scalar,
     needs_i8_conversion,
 )
@@ -578,6 +579,13 @@ def _transform_general(self, func, *args, **kwargs):
             result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
+        # we will only try to coerce the result type if
+        # we have a numeric dtype, as these are *always* user-defined funcs
+        # the cython take a different path (and casting)
+        if is_numeric_dtype(result.dtype):
+            result = maybe_downcast_numeric(
+                result, self._selected_obj.dtype, same_kind=True
+            )
 
         result.name = self._selected_obj.name
         return result

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1226,6 +1226,9 @@ def _python_agg_general(self, func, *args, **kwargs):
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
 
+            if is_numeric_dtype(obj.dtype):
+                result = maybe_downcast_numeric(result, obj.dtype, same_kind=True)
+
             if self.grouper._filter_empty_groups:
                 mask = counts.ravel() > 0
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -302,7 +302,11 @@ def f(x):
         return float(len(x))
 
     agged = grouped.agg(f)
-    expected = Series([4.0, 2.0], index=["bar", "foo"])
+
+    # precision will only be preserved when the input dtype is the same kind as output
+    expected = Series(
+        [4.0, 2.0], index=["bar", "foo"], dtype=dtype if dtype == "float32" else None
+    )
     tm.assert_series_equal(agged, expected)
 
 

diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
@@ -1204,9 +1204,6 @@ def test_resample_median_bug_1688():
 
         result = df.resample("T").apply(lambda x: x.mean())
         exp = df.asfreq("T")
-        if dtype == "float32":
-            # TODO: fastpath for apply comes back at float64
-            exp = exp.astype("float64")
         tm.assert_frame_equal(result, exp)
 
         result = df.resample("T").median()