redo solution - modify SeriesGroupBy._transform_general only

pandas-dev · jreback · Aug 7, 2020 · Jun 27, 2020 · Jun 29, 2020 · Jun 27, 2020
commit 9b536dd2d281e564474a8de91b38f672cea3c4f5
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -1017,7 +1017,6 @@ Indexing
 
 Missing
 ^^^^^^^
-<<<<<<< HEAD
 - Calling :meth:`fillna` on an empty :class:`Series` now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
 - Bug in :meth:`Series.replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
 - Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable Boolean dtype and with ``skipna=False`` (:issue:`33253`)
@@ -1026,19 +1025,6 @@ Missing
 - Bug in :meth:`DataFrame.interpolate` when called on a :class:`DataFrame` with column names of string type was throwing a ValueError. The method is now independent of the type of the column names (:issue:`33956`)
 - Passing :class:`NA` into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
 - Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`)
-=======
-- Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
-- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
-- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`)
-- Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`)
-- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
-- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
-- passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
-<<<<<<< HEAD
-- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
->>>>>>> 90e9b6a10... update whatnew + styling improvements
-=======
->>>>>>> 8c11b6072... removed 1.1 release note
 
 MultiIndex
 ^^^^^^^^^^

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -109,7 +109,7 @@ Indexing
 Missing
 ^^^^^^^
 
--
+- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
 -
 
 MultiIndex

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -246,31 +246,11 @@ def array_with_unit_to_datetime(
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 7df44d10f... revert accidental changes
             result = (iresult * m).astype('M8[ns]')
             iresult = result.view('i8')
             iresult[mask] = NPY_NAT
             return result, tz
 
-<<<<<<< HEAD
-=======
-            # GH20445
-            if values.dtype.kind == 'i':
-                result = (iresult * m).astype('M8[ns]')
-                iresult = result.view('i8')
-                iresult[mask] = NPY_NAT
-                return result, tz
-            elif values.dtype.kind == 'f':
-                result = (fresult * m_as_float).astype('M8[ns]')
-                fresult = result.view('f8')
-                fresult[mask] = NPY_NAT
-                return result, tz
->>>>>>> f1ae8f562... _libs/tslib.pyx added comments
-=======
->>>>>>> 7df44d10f... revert accidental changes
     result = np.empty(n, dtype='M8[ns]')
     iresult = result.view('i8')
 

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -120,11 +120,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
 
-<<<<<<< HEAD
 cpdef inline (int64_t, int) precision_from_unit(str unit):
-=======
-cpdef inline object precision_from_unit(str unit):
->>>>>>> 6b9d4de82... revert changes
     """
     Return a casting of the unit represented to nanoseconds + the precision
     to round the fractional part.

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -39,7 +39,6 @@
     maybe_cast_result_dtype,
     maybe_convert_objects,
     maybe_downcast_numeric,
-    maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -535,26 +534,25 @@ def _transform_general(
             if isinstance(res, (ABCDataFrame, ABCSeries)):
                 res = res._values
 
-            indexer = self._get_index(name)
-            ser = klass(res, indexer)
-            results.append(ser)
+            results.append(klass(res, index=group.index))
 
         # check for empty "results" to avoid concat ValueError
         if results:
             from pandas.core.reshape.concat import concat
 
-            result = concat(results).sort_index()
+            concatenated = concat(results)
+            result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
-
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
-        # make sure we don't accidentally upcast (GH35014)
-        types = ["bool", "int32", "int64", "float32", "float64"]
-        dtype = self._selected_obj.dtype
-        if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
-            result = maybe_downcast_to_dtype(result, dtype)
+        if is_numeric_dtype(result.dtype):
+            common_dtype = np.find_common_type(
+                [self._selected_obj.dtype, result.dtype], []
+            )
+            if common_dtype is result.dtype:
+                result = maybe_downcast_numeric(result, self._selected_obj.dtype)
 
         result.name = self._selected_obj.name
         result.index = self._selected_obj.index

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -54,7 +54,6 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import isna, notna
 
-import pandas as pd
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, DatetimeArray
@@ -624,12 +623,7 @@ def get_converter(s):
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        return [
-            self.indices.get(name, [])
-            if not isna(name)
-            else self.indices.get(pd.NaT, [])
-            for name in names
-        ]
+        return [self.indices.get(name, []) for name in names]
 
     def _get_index(self, name):
         """
@@ -813,7 +807,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
+        inds = self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -20,7 +20,6 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
-import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
@@ -558,16 +557,7 @@ def indices(self):
             return self.grouper.indices
 
         values = Categorical(self.grouper)
-
-        # GH35014
-        reverse_indexer = values._reverse_indexer()
-        if not self.dropna and any(pd.isna(v) for v in values):
-            return {
-                **reverse_indexer,
-                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
-            }
-        else:
-            return reverse_indexer
+        return values._reverse_indexer()
 
     @property
     def codes(self) -> np.ndarray:
-Original file line number
+Diff line change
@@ Expand Up / @@ -109,7 +109,7 @@ Indexing @@
     Missing
     ^^^^^^^
-    -
+    - Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
     -
     MultiIndex
@@ Expand Down @@