pandas-dev
diff --git a/‎pandas/conftest.py
Lines changed: 28 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 28 additions & 0 deletions
diff --git a/‎pandas/tests/apply/test_numba.py
Lines changed: 3 additions & 3 deletions b/‎pandas/tests/apply/test_numba.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/tests/arrays/string_/test_string_arrow.py
Lines changed: 3 additions & 2 deletions b/‎pandas/tests/arrays/string_/test_string_arrow.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/tests/base/test_misc.py
Lines changed: 1 addition & 3 deletions b/‎pandas/tests/base/test_misc.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎pandas/tests/frame/indexing/test_indexing.py
Lines changed: 3 additions & 7 deletions b/‎pandas/tests/frame/indexing/test_indexing.py
Lines changed: 3 additions & 7 deletions
diff --git a/‎pandas/tests/frame/methods/test_rank.py
Lines changed: 7 additions & 7 deletions b/‎pandas/tests/frame/methods/test_rank.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎pandas/tests/frame/test_constructors.py
Lines changed: 2 additions & 5 deletions b/‎pandas/tests/frame/test_constructors.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎pandas/tests/groupby/methods/test_size.py
Lines changed: 2 additions & 11 deletions b/‎pandas/tests/groupby/methods/test_size.py
Lines changed: 2 additions & 11 deletions
diff --git a/‎pandas/tests/groupby/methods/test_value_counts.py
Lines changed: 3 additions & 11 deletions b/‎pandas/tests/groupby/methods/test_value_counts.py
Lines changed: 3 additions & 11 deletions
diff --git a/‎pandas/tests/groupby/test_groupby.py
Lines changed: 2 additions & 9 deletions b/‎pandas/tests/groupby/test_groupby.py
Lines changed: 2 additions & 9 deletions
@@ -1228,6 +1228,34 @@ def string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        ("python", pd.NA),
+        pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
+        pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
+        ("python", np.nan),
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
+)
+def string_dtype_no_object(request):
+    """
+    Parametrized fixture for string dtypes.
+    * 'string[python]' (NA variant)
+    * 'string[pyarrow]' (NA variant)
+    * 'str' (NaN variant, with pyarrow)
+    * 'str' (NaN variant, without pyarrow)
+    """
+    # need to instantiate the StringDtype here instead of in the params
+    # to avoid importing pyarrow during test collection
+    storage, na_value = request.param
+    return pd.StringDtype(storage, na_value)
+
+
 @pytest.fixture(
     params=[
         "string[python]",
 
@@ -5,6 +5,7 @@
 
 import pandas.util._test_decorators as td
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -29,11 +30,10 @@ def test_numba_vs_python_noop(float_frame, apply_axis):
 
 def test_numba_vs_python_string_index():
     # GH#56189
-    pytest.importorskip("pyarrow")
     df = DataFrame(
         1,
-        index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
-        columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
+        index=Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
+        columns=Index(["x", "y"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     func = lambda x: x
     result = df.apply(func, engine="numba", axis=0)
 
@@ -241,10 +241,11 @@ def test_setitem_invalid_indexer_raises():
         arr[[0, 1]] = ["foo", "bar", "baz"]
 
 
-@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
-def test_pickle_roundtrip(dtype):
+@pytest.mark.parametrize("na_value", [pd.NA, np.nan])
+def test_pickle_roundtrip(na_value):
     # GH 42600
     pytest.importorskip("pyarrow")
+    dtype = StringDtype("pyarrow", na_value=na_value)
     expected = pd.Series(range(10), dtype=dtype)
     expected_sliced = expected.head(2)
     full_pickled = pickle.dumps(expected)
 
@@ -180,9 +180,7 @@ def test_access_by_position(index_flat):
     assert index[-1] == index[size - 1]
 
     msg = f"index {size} is out of bounds for axis 0 with size {size}"
-    if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
-        index.dtype, "string[pyarrow_numpy]"
-    ):
+    if isinstance(index.dtype, pd.StringDtype) and index.dtype.storage == "pyarrow":
         msg = "index out of bounds"
     with pytest.raises(IndexError, match=msg):
         index[size]
 
@@ -1955,13 +1955,11 @@ def test_adding_new_conditional_column() -> None:
     ("dtype", "infer_string"),
     [
         (object, False),
-        ("string[pyarrow_numpy]", True),
+        (pd.StringDtype(na_value=np.nan), True),
     ],
 )
 def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     # https://github.com/pandas-dev/pandas/issues/56204
-    pytest.importorskip("pyarrow")
-
     df = DataFrame({"a": [1, 2], "b": [3, 4]})
     with pd.option_context("future.infer_string", infer_string):
         df.loc[df["a"] == 1, "c"] = "1"
@@ -1971,16 +1969,14 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     tm.assert_frame_equal(df, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_add_new_column_infer_string():
     # GH#55366
-    pytest.importorskip("pyarrow")
     df = DataFrame({"x": [1]})
     with pd.option_context("future.infer_string", True):
         df.loc[df["x"] == 1, "y"] = "1"
     expected = DataFrame(
-        {"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
-        columns=Index(["x", "y"], dtype=object),
+        {"x": [1], "y": Series(["1"], dtype=pd.StringDtype(na_value=np.nan))},
+        columns=Index(["x", "y"], dtype="str"),
     )
     tm.assert_frame_equal(df, expected)
 
 
@@ -14,6 +14,7 @@
 )
 from pandas.compat import HAS_PYARROW
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -509,14 +510,13 @@ def test_rank_mixed_axis_zero(self, data, expected):
         result = df.rank(numeric_only=True)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "dtype, exp_dtype",
-        [("string[pyarrow]", "Int64"), ("string[pyarrow_numpy]", "float64")],
-    )
-    def test_rank_string_dtype(self, dtype, exp_dtype):
+    def test_rank_string_dtype(self, string_dtype_no_object):
         # GH#55362
-        pytest.importorskip("pyarrow")
-        obj = Series(["foo", "foo", None, "foo"], dtype=dtype)
+        obj = Series(["foo", "foo", None, "foo"], dtype=string_dtype_no_object)
         result = obj.rank(method="first")
+        exp_dtype = "Int64" if string_dtype_no_object.na_value is pd.NA else "float64"
+        if string_dtype_no_object.storage == "python":
+            # TODO nullable string[python] should also return nullable Int64
+            exp_dtype = "float64"
         expected = Series([1, 2, None, 3], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
@@ -2721,8 +2721,7 @@ def test_construct_with_strings_and_none(self):
 
     def test_frame_string_inference(self):
         # GH#54430
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2756,8 +2755,7 @@ def test_frame_string_inference(self):
 
     def test_frame_string_inference_array_string_dtype(self):
         # GH#54496
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2781,7 +2779,6 @@ def test_frame_string_inference_array_string_dtype(self):
 
     def test_frame_string_inference_block_dim(self):
         # GH#55363
-        pytest.importorskip("pyarrow")
         with pd.option_context("future.infer_string", True):
             df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
         assert df._mgr.blocks[0].ndim == 2
 
@@ -3,8 +3,6 @@
 
 from pandas._config import using_string_dtype
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import is_integer_dtype
 
 from pandas import (
@@ -111,16 +109,9 @@ def test_size_series_masked_type_returns_Int64(dtype):
 
 
 @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_size_strings(dtype):
+def test_size_strings(any_string_dtype):
     # GH#55627
+    dtype = any_string_dtype
     df = DataFrame({"a": ["a", "a", "b"], "b": "a"}, dtype=dtype)
     result = df.groupby("a")["b"].size()
     exp_dtype = "Int64" if dtype == "string[pyarrow]" else "int64"
 
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas import (
     Categorical,
     CategoricalIndex,
@@ -389,14 +387,6 @@ def test_against_frame_and_seriesgroupby(
             tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
 @pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize(
     "sort, ascending, expected_rows, expected_count, expected_group_size",
@@ -414,9 +404,10 @@ def test_compound(
     expected_rows,
     expected_count,
     expected_group_size,
-    dtype,
+    any_string_dtype,
     using_infer_string,
 ):
+    dtype = any_string_dtype
     education_df = education_df.astype(dtype)
     education_df.columns = education_df.columns.astype(dtype)
     # Multiple groupby keys and as_index=False
@@ -433,6 +424,7 @@ def test_compound(
         expected["proportion"] = expected_count
         expected["proportion"] /= expected_group_size
         if dtype == "string[pyarrow]":
+            # TODO(nullable) also string[python] should return nullable dtypes
             expected["proportion"] = expected["proportion"].convert_dtypes()
     else:
         expected["count"] = expected_count
 
@@ -2832,20 +2832,13 @@ def test_rolling_wrong_param_min_period():
         test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_by_column_values_with_same_starting_value(dtype):
+def test_by_column_values_with_same_starting_value(any_string_dtype):
     # GH29635
     df = DataFrame(
         {
             "Name": ["Thomas", "Thomas", "Thomas John"],
             "Credit": [1200, 1300, 900],
-            "Mood": Series(["sad", "happy", "happy"], dtype=dtype),
+            "Mood": Series(["sad", "happy", "happy"], dtype=any_string_dtype),
         }
     )
     aggregate_details = {"Mood": Series.mode, "Credit": "sum"}