From b455c30189be970c99b6ea069d20a079e386917f Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 12 Aug 2024 20:06:59 +0000 Subject: [PATCH 1/5] feat: Series.str.__getitem__ --- bigframes/core/compile/scalar_op_compiler.py | 20 ++- bigframes/operations/__init__.py | 34 ++++ bigframes/operations/strings.py | 14 ++ tests/system/small/operations/test_strings.py | 145 ++++++++++++++---- .../pandas/core/strings/accessor.py | 31 ++++ 5 files changed, 216 insertions(+), 28 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 32749b32a6..e70c49e337 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -902,6 +902,24 @@ def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp): return typing.cast(ibis_types.ArrayValue, x).join(op.delimiter) +@scalar_op_compiler.register_unary_op(ops.ArrayIndexOp, pass_op=True) +def array_index_op_impl(x: ibis_types.Value, op: ops.ArrayIndexOp): + res = typing.cast(ibis_types.ArrayValue, x)[op.index] + if x.type().is_string(): + return _null_or_value(res, res != ibis.literal("")) + else: + return res + + +@scalar_op_compiler.register_unary_op(ops.ArraySliceOp, pass_op=True) +def array_slice_op_impl(x: ibis_types.Value, op: ops.ArraySliceOp): + res = typing.cast(ibis_types.ArrayValue, x)[op.start : op.stop : op.step] + if x.type().is_string(): + return _null_or_value(res, res != ibis.literal("")) + else: + return res + + # JSON Ops @scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True) def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet): @@ -984,7 +1002,7 @@ def ne_op( def _null_or_value(value: ibis_types.Value, where_value: ibis_types.BooleanValue): - return ibis.where( + return ibis.ifelse( where_value, value, ibis.null(), diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 4d4e40643d..fb333d7a53 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -602,6 +602,40 @@ def output_type(self, *input_types): return dtypes.STRING_DTYPE +@dataclasses.dataclass(frozen=True) +class ArrayIndexOp(UnaryOp): + name: typing.ClassVar[str] = "array_index" + index: int + + def output_type(self, *input_types): + input_type = input_types[0] + if dtypes.is_string_like(input_type): + return dtypes.STRING_DTYPE + elif dtypes.is_array_like(input_type): + return dtypes.arrow_dtype_to_bigframes_dtype( + input_type.pyarrow_dtype.value_type + ) + else: + raise TypeError("Input type must be an array or string-like type.") + + +@dataclasses.dataclass(frozen=True) +class ArraySliceOp(UnaryOp): + name: typing.ClassVar[str] = "array_slice" + start: int + stop: typing.Optional[int] = None + step: typing.Optional[int] = None + + def output_type(self, *input_types): + input_type = input_types[0] + if dtypes.is_string_like(input_type): + return dtypes.STRING_DTYPE + elif dtypes.is_array_like(input_type): + return input_type + else: + raise TypeError("Input type must be an array or string-like type.") + + ## JSON Ops @dataclasses.dataclass(frozen=True) class JSONExtract(UnaryOp): diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index 22c325d7e0..91c849479f 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -38,6 +38,20 @@ class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods): __doc__ = vendorstr.StringMethods.__doc__ + def __getitem__(self, key: int | slice) -> series.Series: + if isinstance(key, int): + return self._apply_unary_op(ops.ArrayIndexOp(index=key)) + elif isinstance(key, slice): + return self._apply_unary_op( + ops.ArraySliceOp( + start=key.start if key.start is not None else 0, + stop=key.stop, + step=key.step, + ) + ) + else: + raise ValueError(f"key must be an int or slice, got {type(key).__name__}") + def find( self, sub: str, diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index b8a8ad2d1e..fbb24246b6 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -15,9 +15,11 @@ import re import pandas as pd +import pyarrow as pa import pytest -import bigframes.series +import bigframes.dtypes as dtypes +import bigframes.pandas as bpd from ...utils import assert_series_equal @@ -25,7 +27,7 @@ def test_find(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.find("W").to_pandas() pd_result = scalars_pandas_df[col_name].str.find("W") @@ -50,7 +52,7 @@ def test_find(scalars_dfs): def test_str_contains(scalars_dfs, pat, case, flags, regex): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.contains( pat, case=case, flags=flags, regex=regex @@ -72,7 +74,7 @@ def test_str_contains(scalars_dfs, pat, case, flags, regex): def test_str_extract(scalars_dfs, pat): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.extract(pat).to_pandas() pd_result = scalars_pandas_df[col_name].str.extract(pat) @@ -101,7 +103,7 @@ def test_str_extract(scalars_dfs, pat): def test_str_replace(scalars_dfs, pat, repl, case, flags, regex): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.replace( pat, repl=repl, case=case, flags=flags, regex=regex @@ -132,7 +134,7 @@ def test_str_replace(scalars_dfs, pat, repl, case, flags, regex): def test_str_startswith(scalars_dfs, pat): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name].astype("object") bf_result = bf_series.str.startswith(pat).to_pandas() @@ -157,7 +159,7 @@ def test_str_startswith(scalars_dfs, pat): def test_str_endswith(scalars_dfs, pat): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name].astype("object") bf_result = bf_series.str.endswith(pat).to_pandas() @@ -169,7 +171,7 @@ def test_str_endswith(scalars_dfs, pat): def test_len(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.len().to_pandas() pd_result = scalars_pandas_df[col_name].str.len() @@ -188,7 +190,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df): See: https://stackoverflow.com/a/41340543/101923 """ col_name = "event_sequence" - bf_series: bigframes.series.Series = nested_df[col_name] + bf_series: bpd.Series = nested_df[col_name] bf_result = bf_series.str.len().to_pandas() pd_result = nested_pandas_df[col_name].str.len() @@ -204,7 +206,7 @@ def test_len_with_array_column(nested_df, nested_pandas_df): def test_lower(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.lower().to_pandas() pd_result = scalars_pandas_df[col_name].str.lower() @@ -217,7 +219,7 @@ def test_lower(scalars_dfs): def test_reverse(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.reverse().to_pandas() pd_result = scalars_pandas_df[col_name].copy() for i in pd_result.index: @@ -239,7 +241,7 @@ def test_reverse(scalars_dfs): def test_slice(scalars_dfs, start, stop): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.slice(start, stop).to_pandas() pd_series = scalars_pandas_df[col_name] pd_result = pd_series.str.slice(start, stop) @@ -253,7 +255,7 @@ def test_slice(scalars_dfs, start, stop): def test_strip(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.strip().to_pandas() pd_result = scalars_pandas_df[col_name].str.strip() @@ -266,7 +268,7 @@ def test_strip(scalars_dfs): def test_upper(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.upper().to_pandas() pd_result = scalars_pandas_df[col_name].str.upper() @@ -375,7 +377,7 @@ def test_isupper(weird_strings, weird_strings_pd): def test_rstrip(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.rstrip().to_pandas() pd_result = scalars_pandas_df[col_name].str.rstrip() @@ -388,7 +390,7 @@ def test_rstrip(scalars_dfs): def test_lstrip(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.lstrip().to_pandas() pd_result = scalars_pandas_df[col_name].str.lstrip() @@ -402,7 +404,7 @@ def test_lstrip(scalars_dfs): def test_repeat(scalars_dfs, repeats): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.repeat(repeats).to_pandas() pd_result = scalars_pandas_df[col_name].str.repeat(repeats) @@ -415,7 +417,7 @@ def test_repeat(scalars_dfs, repeats): def test_capitalize(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.capitalize().to_pandas() pd_result = scalars_pandas_df[col_name].str.capitalize() @@ -428,9 +430,9 @@ def test_capitalize(scalars_dfs): def test_cat_with_series(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_filter: bigframes.series.Series = scalars_df["bool_col"] - bf_left: bigframes.series.Series = scalars_df[col_name][bf_filter] - bf_right: bigframes.series.Series = scalars_df[col_name] + bf_filter: bpd.Series = scalars_df["bool_col"] + bf_left: bpd.Series = scalars_df[col_name][bf_filter] + bf_right: bpd.Series = scalars_df[col_name] bf_result = bf_left.str.cat(others=bf_right).to_pandas() pd_filter = scalars_pandas_df["bool_col"] pd_left = scalars_pandas_df[col_name][pd_filter] @@ -447,7 +449,7 @@ def test_str_match(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" pattern = "[A-Z].*" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.match(pattern).to_pandas() pd_result = scalars_pandas_df[col_name].str.match(pattern) @@ -461,7 +463,7 @@ def test_str_fullmatch(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" pattern = "[A-Z].*!" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.fullmatch(pattern).to_pandas() pd_result = scalars_pandas_df[col_name].str.fullmatch(pattern) @@ -474,7 +476,7 @@ def test_str_fullmatch(scalars_dfs): def test_str_get(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.get(8).to_pandas() pd_result = scalars_pandas_df[col_name].str.get(8) @@ -487,7 +489,7 @@ def test_str_get(scalars_dfs): def test_str_pad(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.pad(8, side="both", fillchar="%").to_pandas() pd_result = scalars_pandas_df[col_name].str.pad(8, side="both", fillchar="%") @@ -510,7 +512,7 @@ def test_str_zfill(weird_strings, weird_strings_pd): def test_str_ljust(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.ljust(7, fillchar="%").to_pandas() pd_result = scalars_pandas_df[col_name].str.ljust(7, fillchar="%") @@ -523,7 +525,7 @@ def test_str_ljust(scalars_dfs): def test_str_rjust(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "string_col" - bf_series: bigframes.series.Series = scalars_df[col_name] + bf_series: bpd.Series = scalars_df[col_name] bf_result = bf_series.str.rjust(9, fillchar="%").to_pandas() pd_result = scalars_pandas_df[col_name].str.rjust(9, fillchar="%") @@ -562,3 +564,92 @@ def test_str_split_raise_errors(scalars_dfs, pat, regex): pd_result = pd_result.apply(lambda x: [] if pd.isnull(x) is True else x) assert_series_equal(pd_result, bf_result, check_dtype=False) + + +@pytest.mark.parametrize( + ("index"), + [ + pytest.param( + "first", id="invalid_type", marks=pytest.mark.xfail(raises=ValueError) + ), + pytest.param( + slice(0, 2, 2), + id="only_support_step_one", + marks=pytest.mark.xfail(raises=ValueError), + ), + ], +) +def test_getitem_raise_errors(scalars_dfs, index): + scalars_df, _ = scalars_dfs + col_name = "string_col" + scalars_df[col_name].str[index] + + +@pytest.mark.parametrize( + ("index"), + [ + pytest.param(2, id="int"), + pytest.param(slice(None, None, None), id="default_start_slice"), + pytest.param(slice(0, None, 1), id="default_stop_slice"), + pytest.param(slice(0, 2, None), id="default_step_slice"), + ], +) +def test_getitem_w_string(scalars_dfs, index): + scalars_df, scalars_pandas_df = scalars_dfs + col_name = "string_col" + bf_result = scalars_df[col_name].str[index].to_pandas() + pd_result = scalars_pandas_df[col_name].str[index] + + assert_series_equal(pd_result, bf_result) + + +@pytest.mark.parametrize( + ("index"), + [ + pytest.param(2, id="int"), + pytest.param(slice(None, None, None), id="default_start_slice"), + pytest.param(slice(0, None, 1), id="default_stop_slice"), + pytest.param(slice(0, 2, None), id="default_step_slice"), + pytest.param(slice(0, 0, None), id="single_one_slice"), + ], +) +def test_getitem_w_array(index): + data = [[1], [2, 3], [], [4, 5, 6]] + s = bpd.Series(data) + pd_s = pd.Series(data) + + bf_result = s.str[index].to_pandas() + pd_result = pd_s.str[index] + # Skip dtype checks here because pandas returns `int64` while BF returns `Int64`. + assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False) + + +def test_getitem_w_struct_array(): + pa_struct = pa.struct( + [ + ("name", pa.string()), + ("age", pa.int64()), + ] + ) + data = [ + [ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 25}, + ], + [ + {"name": "Charlie", "age": 35}, + {"name": "David", "age": 40}, + {"name": "Eva", "age": 28}, + ], + [], + [{"name": "Frank", "age": 50}], + ] + s = bpd.Series(data, dtype=bpd.ArrowDtype(pa.list_(pa_struct))) + + result = s.str[1] + assert dtypes.is_struct_like(result.dtype) + + expected_data = [item[1] if len(item) > 1 else None for item in data] + expected = bpd.Series(expected_data, dtype=bpd.ArrowDtype((pa_struct))) + + assert_series_equal(result.to_pandas(), expected.to_pandas()) diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py index b02c23f945..fe820cf36d 100644 --- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py @@ -13,6 +13,37 @@ class StringMethods: R's stringr package. """ + def __getitem__(self, pat: str, flags: int = 0): + """ + Index or slice string or list in the Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['Alice', 'Bob', 'Charlie']) + >>> s.str[0] + 0 A + 1 B + 2 C + dtype: string + + >>> s.str[0:3] + 0 Ali + 1 Bob + 2 Cha + dtype: string + + Args: + key (int | slice): + Index or slice of indices to access from each string or list. + + Returns: + bigframes.series.Series: The list at requested index. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def extract(self, pat: str, flags: int = 0): """ Extract capture groups in the regex `pat` as columns in a DataFrame. From 8304d835b2b164073f68cc211c59cf87e1eb58f6 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 12 Aug 2024 21:05:12 +0000 Subject: [PATCH 2/5] handles error cases --- bigframes/operations/strings.py | 13 +++++++++++++ tests/system/small/operations/test_strings.py | 12 ++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index 91c849479f..c72cd751f7 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -40,8 +40,21 @@ class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMet def __getitem__(self, key: int | slice) -> series.Series: if isinstance(key, int): + if key < 0: + raise NotImplementedError("Negative indexing is not supported.") return self._apply_unary_op(ops.ArrayIndexOp(index=key)) elif isinstance(key, slice): + if key.step is not None and key.step != 1: + raise NotImplementedError( + f"Only a step of 1 is allowed, got {key.step}" + ) + if (key.start is not None and key.start < 0) or ( + key.stop is not None and key.stop < 0 + ): + raise NotImplementedError( + "Slicing with negative numbers is not allowed." + ) + return self._apply_unary_op( ops.ArraySliceOp( start=key.start if key.start is not None else 0, diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index fbb24246b6..6509061cfd 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -572,10 +572,18 @@ def test_str_split_raise_errors(scalars_dfs, pat, regex): pytest.param( "first", id="invalid_type", marks=pytest.mark.xfail(raises=ValueError) ), + pytest.param( + -1, id="neg_index", marks=pytest.mark.xfail(raises=NotImplementedError) + ), pytest.param( slice(0, 2, 2), - id="only_support_step_one", - marks=pytest.mark.xfail(raises=ValueError), + id="only_allow_one_step", + marks=pytest.mark.xfail(raises=NotImplementedError), + ), + pytest.param( + slice(-1, None, None), + id="neg_slicing", + marks=pytest.mark.xfail(raises=NotImplementedError), ), ], ) From a931dcd8fb5ad715dcd4968e94ce915767985dcb Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 12 Aug 2024 21:15:10 +0000 Subject: [PATCH 3/5] fixing mypy --- tests/system/small/operations/test_strings.py | 2 +- third_party/bigframes_vendored/pandas/core/strings/accessor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index 6509061cfd..2a9eeff3c4 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -639,7 +639,7 @@ def test_getitem_w_struct_array(): ("age", pa.int64()), ] ) - data = [ + data: list[list[dict]] = [ [ {"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}, diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py index fe820cf36d..8d91baea9e 100644 --- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py @@ -13,7 +13,7 @@ class StringMethods: R's stringr package. """ - def __getitem__(self, pat: str, flags: int = 0): + def __getitem__(self, key: int | slice): """ Index or slice string or list in the Series. From ce8507c6c127867d118a0b37fec7158b7cd75c52 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 12 Aug 2024 21:26:25 +0000 Subject: [PATCH 4/5] fixing unit --- bigframes/operations/strings.py | 2 +- third_party/bigframes_vendored/pandas/core/strings/accessor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index c72cd751f7..d3e9c7edc6 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -38,7 +38,7 @@ class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods): __doc__ = vendorstr.StringMethods.__doc__ - def __getitem__(self, key: int | slice) -> series.Series: + def __getitem__(self, key: Union[int, slice]) -> series.Series: if isinstance(key, int): if key < 0: raise NotImplementedError("Negative indexing is not supported.") diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py index 8d91baea9e..bd5e78f415 100644 --- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py @@ -13,7 +13,7 @@ class StringMethods: R's stringr package. """ - def __getitem__(self, key: int | slice): + def __getitem__(self, key: typing.Union[int, slice]): """ Index or slice string or list in the Series. From e4f4dde7437f4e735291513be19742b27b4544cc Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 13 Aug 2024 04:14:20 +0000 Subject: [PATCH 5/5] fixing system-tests --- tests/system/small/operations/test_strings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index 2a9eeff3c4..3191adf920 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -14,6 +14,7 @@ import re +import packaging.version import pandas as pd import pyarrow as pa import pytest @@ -633,6 +634,9 @@ def test_getitem_w_array(index): def test_getitem_w_struct_array(): + if packaging.version.Version(pd.__version__) <= packaging.version.Version("1.5.0"): + pytest.skip("https://github.com/googleapis/python-bigquery/issues/1992") + pa_struct = pa.struct( [ ("name", pa.string()),