BUG: Handle zero-chunked pyarrow.ChunkedArray in StringArray

pandas-dev · jorisvandenbossche · Apr 21, 2021 · Apr 20, 2021 · Apr 20, 2021 · Apr 20, 2021
commit 3dab96d87f3eeba1b710e09624ff9e3730121f07
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -700,7 +700,7 @@ Conversion
 Strings
 ^^^^^^^
 
--
+    <
8000
span class='blob-code-inner blob-code-marker ' data-code-marker="+">- Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`StringArray` when the original had zero chunks (:issue:`41040`)
 -
 
 Interval

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -118,7 +118,10 @@ def __from_arrow__(
             str_arr = StringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
-        return StringArray._concat_same_type(results)
+        if len(results) > 0:
+            return StringArray._concat_same_type(results)
+        else:
+            return StringArray(np.array([], dtype="object"))
 
 
 class StringArray(PandasArray):

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -476,6 +476,22 @@ def test_arrow_roundtrip(dtype, dtype_object):
     assert result.loc[2, "a"] is pd.NA
 
 
+@td.skip_if_no("pyarrow", min_version="0.15.1.dev")
+def test_arrow_load_from_zero_chunks(dtype, dtype_object):
+    # GH-41040
+    import pyarrow as pa
+
+    data = pd.array([], dtype=dtype)
+    df = pd.DataFrame({"a": data})
+    table = pa.table(df)
+    assert table.field("a").type == "string"
+    # Instantiate the same table with no chunks at all
+    table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
+    result = table.to_pandas()
+    assert isinstance(result["a"].dtype, dtype_object)
+    tm.assert_frame_equal(result, df)
+
+
 def test_value_counts_na(dtype):
     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
     result = arr.value_counts(dropna=False)
-Original file line number
+Diff line change
@@ Expand Up / @@ -700,7 +700,7 @@ Conversion @@
     Strings
     ^^^^^^^
-    -
+    <
8000
span class='blob-code-inner blob-code-marker ' data-code-marker="+">- Bug in the conversion from ``pyarrow.ChunkedArray`` to :class:`StringArray` when the original had zero chunks (:issue:`41040`)
     -
     Interval
@@ Expand Down @@