check compatibility in JoinUnit.is_na

pandas-dev · jreback · Feb 12, 2021 · Feb 3, 2021 · Feb 7, 2021 · Feb 7, 2021
commit 1c63c05d4842dd71bcde19ffa221a532c6e7d3aa
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -73,7 +73,8 @@ def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
     to_concat : array of arrays
     axis : axis to provide concatenation
     ea_compat_axis : bool, default False
-        For ExtensionArray compat, behave as if axis == 1
+        For ExtensionArray compat, behave as if axis == 1 when determining
+        whether to drop empty arrays.
 
     Returns
     -------

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -19,7 +19,7 @@
     is_sparse,
 )
 from pandas.core.dtypes.concat import concat_compat
-from pandas.core.dtypes.missing import isna_all
+from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna_all
 
 import pandas.core.algorithms as algos
 from pandas.core.arrays import DatetimeArray, ExtensionArray, TimedeltaArray
@@ -227,6 +227,24 @@ def dtype(self):
         else:
             return get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0])
 
+    def is_valid_na_for(self, dtype: DtypeObj) -> bool:
+        """
+        Check that we are all-NA of a type/dtype that is compatible with this dtype.
+        """
+        if not self.is_na:
+            return False
+        if self.block is None:
+            return True
+
+        if self.dtype == object:
+            values = self.block.values
+            return all(
+                is_valid_nat_for_dtype(x, dtype) for x in values.ravel(order="K")
+            )
+
+        na_value = self.block.fill_value
+        return is_valid_nat_for_dtype(na_value, dtype)
+
     @cache_readonly
     def is_na(self) -> bool:
         if self.block is None:
@@ -257,7 +275,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
         else:
             fill_value = upcasted_na
 
-            if self.is_na:
+            if self.is_valid_na_for(empty_dtype):
                 blk_dtype = getattr(self.block, "dtype", None)
 
                 if blk_dtype == np.dtype(object):
@@ -418,8 +436,12 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
         return empty_dtype
 
     has_none_blocks = any(unit.block is None for unit in join_units)
-    dtypes = [None if unit.block is None else unit.dtype for unit in join_units]
-    dtypes = [x for x in dtypes if x is not None]
+
+    dtypes = [
+        unit.dtype for unit in join_units if unit.block is not None and not unit.is_na
+    ]
+    if not len(dtypes):
+        dtypes = [unit.dtype for unit in join_units if unit.block is not None]
 
     dtype = find_common_type(dtypes)
     if has_none_blocks:

diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
@@ -165,23 +165,23 @@ def test_append_dtypes(self):
         df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([Timestamp("20130101"), np.nan], dtype="object")}
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
         )
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
         df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([Timestamp("20130101"), np.nan], dtype="object")}
+            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
         )
         tm.assert_frame_equal(result, expected)
 
         df1 = DataFrame({"bar": np.nan}, index=range(1))
         df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
         result = df1.append(df2)
         expected = DataFrame(
-            {"bar": Series([np.nan, Timestamp("20130101")], dtype="object")}
+            {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -160,8 +160,7 @@ def test_partial_setting_mixed_dtype(self):
         df = DataFrame(columns=["A", "B"])
         df.loc[0] = Series(1, index=["B"])
 
-        # TODO: having this be float64 would not be unreasonable
-        exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="object")
+        exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
         tm.assert_frame_equal(df, exp)
 
         # list-like must conform

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -670,7 +670,6 @@ def test_join_append_timedeltas(self):
                 "t": [timedelta(0, 22500), timedelta(0, 22500)],
             }
         )
-        expected = expected.astype(object)
         tm.assert_frame_equal(result, expected)
 
         td = np.timedelta64(300000000)