From b455c30189be970c99b6ea069d20a079e386917f Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 12 Aug 2024 20:06:59 +0000
Subject: [PATCH 1/5] feat: Series.str.__getitem__

---
 bigframes/core/compile/scalar_op_compiler.py  |  20 ++-
 bigframes/operations/__init__.py              |  34 ++++
 bigframes/operations/strings.py               |  14 ++
 tests/system/small/operations/test_strings.py | 145 ++++++++++++++----
 .../pandas/core/strings/accessor.py           |  31 ++++
 5 files changed, 216 insertions(+), 28 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 32749b32a6..e70c49e337 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -902,6 +902,24 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
     return typing.cast(ibis_types.ArrayValue, x).join(op.delimiter)
 
 
+@scalar_op_compiler.register_unary_op(ops.ArrayIndexOp, pass_op=True)
+def array_index_op_impl(x: ibis_types.Value, op: ops.ArrayIndexOp):
+    res = typing.cast(ibis_types.ArrayValue, x)[op.index]
+    if x.type().is_string():
+        return _null_or_value(res, res != ibis.literal(""))
+    else:
+        return res
+
+
+@scalar_op_compiler.register_unary_op(ops.ArraySliceOp, pass_op=True)
+def array_slice_op_impl(x: ibis_types.Value, op: ops.ArraySliceOp):
+    res = typing.cast(ibis_types.ArrayValue, x)[op.start : op.stop : op.step]
+    if x.type().is_string():
+        return _null_or_value(res, res != ibis.literal(""))
+    else:
+        return res
+
+
 # JSON Ops
 @scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
 def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
@@ -984,7 +1002,7 @@ def ne_op(
 
 
 def _null_or_value(value: ibis_types.Value, where_value: ibis_types.BooleanValue):
-    return ibis.where(
+    return ibis.ifelse(
         where_value,
         value,
         ibis.null(),
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 4d4e40643d..fb333d7a53 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -602,6 +602,40 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
+@dataclasses.dataclass(frozen=True)
+class ArrayIndexOp(UnaryOp):
+    name: typing.ClassVar[str] = "array_index"
+    index: int
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if dtypes.is_string_like(input_type):
+            return dtypes.STRING_DTYPE
+        elif dtypes.is_array_like(input_type):
+            return dtypes.arrow_dtype_to_bigframes_dtype(
+                input_type.pyarrow_dtype.value_type
+            )
+        else:
+            raise TypeError("Input type must be an array or string-like type.")
+
+
+@dataclasses.dataclass(frozen=True)
+class ArraySliceOp(UnaryOp):
+    name: typing.ClassVar[str] = "array_slice"
+    start: int
+    stop: typing.Optional[int] = None
+    step: typing.Optional[int] = None
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if dtypes.is_string_like(input_type):
+            return dtypes.STRING_DTYPE
+        elif dtypes.is_array_like(input_type):
+            return input_type
+        else:
+            raise TypeError("Input type must be an array or string-like type.")
+
+
 ## JSON Ops
 @dataclasses.dataclass(frozen=True)
 class JSONExtract(UnaryOp):
diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index 22c325d7e0..91c849479f 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -38,6 +38,20 @@
 class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods):
     __doc__ = vendorstr.StringMethods.__doc__
 
+    def __getitem__(self, key: int | slice) -> series.Series:
+        if isinstance(key, int):
+            return self._apply_unary_op(ops.ArrayIndexOp(index=key))
+        elif isinstance(key, slice):
+            return self._apply_unary_op(
+                ops.ArraySliceOp(
+                    start=key.start if key.start is not None else 0,
+                    stop=key.stop,
+                    step=key.step,
+                )
+            )
+        else:
+            raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
+
     def find(
         self,
         sub: str,
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index b8a8ad2d1e..fbb24246b6 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -15,9 +15,11 @@
 import re
 
 import pandas as pd
+import pyarrow as pa
 import pytest
 
-import bigframes.series
+import bigframes.dtypes as dtypes
+import bigframes.pandas as bpd
 
 from ...utils import assert_series_equal
 
@@ -25,7 +27,7 @@
 def test_find(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.find("W").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.find("W")
 
@@ -50,7 +52,7 @@ def test_find(scalars_dfs):
 def test_str_contains(scalars_dfs, pat, case, flags, regex):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.contains(
         pat, case=case, flags=flags, regex=regex
@@ -72,7 +74,7 @@ def test_str_contains(scalars_dfs, pat, case, flags, regex):
 def test_str_extract(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.extract(pat).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.extract(pat)
@@ -101,7 +103,7 @@ def test_str_extract(scalars_dfs, pat):
 def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
 
     bf_result = bf_series.str.replace(
         pat, repl=repl, case=case, flags=flags, regex=regex
@@ -132,7 +134,7 @@ def test_str_replace(scalars_dfs, pat, repl, case, flags, regex):
 def test_str_startswith(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     pd_series = scalars_pandas_df[col_name].astype("object")
 
     bf_result = bf_series.str.startswith(pat).to_pandas()
@@ -157,7 +159,7 @@ def test_str_startswith(scalars_dfs, pat):
 def test_str_endswith(scalars_dfs, pat):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     pd_series = scalars_pandas_df[col_name].astype("object")
 
     bf_result = bf_series.str.endswith(pat).to_pandas()
@@ -169,7 +171,7 @@ def test_str_endswith(scalars_dfs, pat):
 def test_len(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.len().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.len()
 
@@ -188,7 +190,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df):
     See: https://stackoverflow.com/a/41340543/101923
     """
     col_name = "event_sequence"
-    bf_series: bigframes.series.Series = nested_df[col_name]
+    bf_series: bpd.Series = nested_df[col_name]
     bf_result = bf_series.str.len().to_pandas()
     pd_result = nested_pandas_df[col_name].str.len()
 
@@ -204,7 +206,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df):
 def test_lower(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.lower().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lower()
 
@@ -217,7 +219,7 @@ def test_lower(scalars_dfs):
 def test_reverse(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.reverse().to_pandas()
     pd_result = scalars_pandas_df[col_name].copy()
     for i in pd_result.index:
@@ -239,7 +241,7 @@ def test_reverse(scalars_dfs):
 def test_slice(scalars_dfs, start, stop):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.slice(start, stop).to_pandas()
     pd_series = scalars_pandas_df[col_name]
     pd_result = pd_series.str.slice(start, stop)
@@ -253,7 +255,7 @@ def test_slice(scalars_dfs, start, stop):
 def test_strip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.strip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.strip()
 
@@ -266,7 +268,7 @@ def test_strip(scalars_dfs):
 def test_upper(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.upper().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.upper()
 
@@ -375,7 +377,7 @@ def test_isupper(weird_strings, weird_strings_pd):
 def test_rstrip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.rstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rstrip()
 
@@ -388,7 +390,7 @@ def test_rstrip(scalars_dfs):
 def test_lstrip(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.lstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lstrip()
 
@@ -402,7 +404,7 @@ def test_lstrip(scalars_dfs):
 def test_repeat(scalars_dfs, repeats):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.repeat(repeats).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.repeat(repeats)
 
@@ -415,7 +417,7 @@ def test_repeat(scalars_dfs, repeats):
 def test_capitalize(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.capitalize().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.capitalize()
 
@@ -428,9 +430,9 @@ def test_capitalize(scalars_dfs):
 def test_cat_with_series(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_filter: bigframes.series.Series = scalars_df["bool_col"]
-    bf_left: bigframes.series.Series = scalars_df[col_name][bf_filter]
-    bf_right: bigframes.series.Series = scalars_df[col_name]
+    bf_filter: bpd.Series = scalars_df["bool_col"]
+    bf_left: bpd.Series = scalars_df[col_name][bf_filter]
+    bf_right: bpd.Series = scalars_df[col_name]
     bf_result = bf_left.str.cat(others=bf_right).to_pandas()
     pd_filter = scalars_pandas_df["bool_col"]
     pd_left = scalars_pandas_df[col_name][pd_filter]
@@ -447,7 +449,7 @@ def test_str_match(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
     pattern = "[A-Z].*"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.match(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.match(pattern)
 
@@ -461,7 +463,7 @@ def test_str_fullmatch(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
     pattern = "[A-Z].*!"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.fullmatch(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.fullmatch(pattern)
 
@@ -474,7 +476,7 @@ def test_str_fullmatch(scalars_dfs):
 def test_str_get(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.get(8).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.get(8)
 
@@ -487,7 +489,7 @@ def test_str_get(scalars_dfs):
 def test_str_pad(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.pad(8, side="both", fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.pad(8, side="both", fillchar="%")
 
@@ -510,7 +512,7 @@ def test_str_zfill(weird_strings, weird_strings_pd):
 def test_str_ljust(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.ljust(7, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.ljust(7, fillchar="%")
 
@@ -523,7 +525,7 @@ def test_str_ljust(scalars_dfs):
 def test_str_rjust(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "string_col"
-    bf_series: bigframes.series.Series = scalars_df[col_name]
+    bf_series: bpd.Series = scalars_df[col_name]
     bf_result = bf_series.str.rjust(9, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rjust(9, fillchar="%")
 
@@ -562,3 +564,92 @@ def test_str_split_raise_errors(scalars_dfs, pat, regex):
     pd_result = pd_result.apply(lambda x: [] if pd.isnull(x) is True else x)
 
     assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(
+            "first", id="invalid_type", marks=pytest.mark.xfail(raises=ValueError)
+        ),
+        pytest.param(
+            slice(0, 2, 2),
+            id="only_support_step_one",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+    ],
+)
+def test_getitem_raise_errors(scalars_dfs, index):
+    scalars_df, _ = scalars_dfs
+    col_name = "string_col"
+    scalars_df[col_name].str[index]
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(2, id="int"),
+        pytest.param(slice(None, None, None), id="default_start_slice"),
+        pytest.param(slice(0, None, 1), id="default_stop_slice"),
+        pytest.param(slice(0, 2, None), id="default_step_slice"),
+    ],
+)
+def test_getitem_w_string(scalars_dfs, index):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = scalars_df[col_name].str[index].to_pandas()
+    pd_result = scalars_pandas_df[col_name].str[index]
+
+    assert_series_equal(pd_result, bf_result)
+
+
+@pytest.mark.parametrize(
+    ("index"),
+    [
+        pytest.param(2, id="int"),
+        pytest.param(slice(None, None, None), id="default_start_slice"),
+        pytest.param(slice(0, None, 1), id="default_stop_slice"),
+        pytest.param(slice(0, 2, None), id="default_step_slice"),
+        pytest.param(slice(0, 0, None), id="single_one_slice"),
+    ],
+)
+def test_getitem_w_array(index):
+    data = [[1], [2, 3], [], [4, 5, 6]]
+    s = bpd.Series(data)
+    pd_s = pd.Series(data)
+
+    bf_result = s.str[index].to_pandas()
+    pd_result = pd_s.str[index]
+    # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`.
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+def test_getitem_w_struct_array():
+    pa_struct = pa.struct(
+        [
+            ("name", pa.string()),
+            ("age", pa.int64()),
+        ]
+    )
+    data = [
+        [
+            {"name": "Alice", "age": 30},
+            {"name": "Bob", "age": 25},
+        ],
+        [
+            {"name": "Charlie", "age": 35},
+            {"name": "David", "age": 40},
+            {"name": "Eva", "age": 28},
+        ],
+        [],
+        [{"name": "Frank", "age": 50}],
+    ]
+    s = bpd.Series(data, dtype=bpd.ArrowDtype(pa.list_(pa_struct)))
+
+    result = s.str[1]
+    assert dtypes.is_struct_like(result.dtype)
+
+    expected_data = [item[1] if len(item) > 1 else None for item in data]
+    expected = bpd.Series(expected_data, dtype=bpd.ArrowDtype((pa_struct)))
+
+    assert_series_equal(result.to_pandas(), expected.to_pandas())
diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
index b02c23f945..fe820cf36d 100644
--- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
@@ -13,6 +13,37 @@ class StringMethods:
     R's stringr package.
     """
 
+    def __getitem__(self, pat: str, flags: int = 0):
+        """
+        Index or slice string or list in the Series.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series(['Alice', 'Bob', 'Charlie'])
+            >>> s.str[0]
+            0     A
+            1     B
+            2     C
+            dtype: string
+
+            >>> s.str[0:3]
+            0     Ali
+            1     Bob
+            2     Cha
+            dtype: string
+
+        Args:
+            key (int | slice):
+                Index or slice of indices to access from each string or list.
+
+        Returns:
+            bigframes.series.Series: The list at requested index.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def extract(self, pat: str, flags: int = 0):
         """
         Extract capture groups in the regex `pat` as columns in a DataFrame.

From 8304d835b2b164073f68cc211c59cf87e1eb58f6 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 12 Aug 2024 21:05:12 +0000
Subject: [PATCH 2/5] handles error cases

---
 bigframes/operations/strings.py               | 13 +++++++++++++
 tests/system/small/operations/test_strings.py | 12 ++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index 91c849479f..c72cd751f7 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -40,8 +40,21 @@ class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMet
 
     def __getitem__(self, key: int | slice) -> series.Series:
         if isinstance(key, int):
+            if key < 0:
+                raise NotImplementedError("Negative indexing is not supported.")
             return self._apply_unary_op(ops.ArrayIndexOp(index=key))
         elif isinstance(key, slice):
+            if key.step is not None and key.step != 1:
+                raise NotImplementedError(
+                    f"Only a step of 1 is allowed, got {key.step}"
+                )
+            if (key.start is not None and key.start < 0) or (
+                key.stop is not None and key.stop < 0
+            ):
+                raise NotImplementedError(
+                    "Slicing with negative numbers is not allowed."
+                )
+
             return self._apply_unary_op(
                 ops.ArraySliceOp(
                     start=key.start if key.start is not None else 0,
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index fbb24246b6..6509061cfd 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -572,10 +572,18 @@ def test_str_split_raise_errors(scalars_dfs, pat, regex):
         pytest.param(
             "first", id="invalid_type", marks=pytest.mark.xfail(raises=ValueError)
         ),
+        pytest.param(
+            -1, id="neg_index", marks=pytest.mark.xfail(raises=NotImplementedError)
+        ),
         pytest.param(
             slice(0, 2, 2),
-            id="only_support_step_one",
-            marks=pytest.mark.xfail(raises=ValueError),
+            id="only_allow_one_step",
+            marks=pytest.mark.xfail(raises=NotImplementedError),
+        ),
+        pytest.param(
+            slice(-1, None, None),
+            id="neg_slicing",
+            marks=pytest.mark.xfail(raises=NotImplementedError),
         ),
     ],
 )

From a931dcd8fb5ad715dcd4968e94ce915767985dcb Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 12 Aug 2024 21:15:10 +0000
Subject: [PATCH 3/5] fixing mypy

---
 tests/system/small/operations/test_strings.py                  | 2 +-
 third_party/bigframes_vendored/pandas/core/strings/accessor.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 6509061cfd..2a9eeff3c4 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -639,7 +639,7 @@ def test_getitem_w_struct_array():
             ("age", pa.int64()),
         ]
     )
-    data = [
+    data: list[list[dict]] = [
         [
             {"name": "Alice", "age": 30},
             {"name": "Bob", "age": 25},
diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
index fe820cf36d..8d91baea9e 100644
--- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
@@ -13,7 +13,7 @@ class StringMethods:
     R's stringr package.
     """
 
-    def __getitem__(self, pat: str, flags: int = 0):
+    def __getitem__(self, key: int | slice):
         """
         Index or slice string or list in the Series.
 

From ce8507c6c127867d118a0b37fec7158b7cd75c52 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 12 Aug 2024 21:26:25 +0000
Subject: [PATCH 4/5] fixing unit

---
 bigframes/operations/strings.py                                | 2 +-
 third_party/bigframes_vendored/pandas/core/strings/accessor.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py
index c72cd751f7..d3e9c7edc6 100644
--- a/bigframes/operations/strings.py
+++ b/bigframes/operations/strings.py
@@ -38,7 +38,7 @@
 class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods):
     __doc__ = vendorstr.StringMethods.__doc__
 
-    def __getitem__(self, key: int | slice) -> series.Series:
+    def __getitem__(self, key: Union[int, slice]) -> series.Series:
         if isinstance(key, int):
             if key < 0:
                 raise NotImplementedError("Negative indexing is not supported.")
diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
index 8d91baea9e..bd5e78f415 100644
--- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
@@ -13,7 +13,7 @@ class StringMethods:
     R's stringr package.
     """
 
-    def __getitem__(self, key: int | slice):
+    def __getitem__(self, key: typing.Union[int, slice]):
         """
         Index or slice string or list in the Series.
 

From e4f4dde7437f4e735291513be19742b27b4544cc Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Tue, 13 Aug 2024 04:14:20 +0000
Subject: [PATCH 5/5] fixing system-tests

---
 tests/system/small/operations/test_strings.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 2a9eeff3c4..3191adf920 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -14,6 +14,7 @@
 
 import re
 
+import packaging.version
 import pandas as pd
 import pyarrow as pa
 import pytest
@@ -633,6 +634,9 @@ def test_getitem_w_array(index):
 
 
 def test_getitem_w_struct_array():
+    if packaging.version.Version(pd.__version__) <= packaging.version.Version("1.5.0"):
+        pytest.skip("https://github.com/googleapis/python-bigquery/issues/1992")
+
     pa_struct = pa.struct(
         [
             ("name", pa.string()),