From e2b2dc03ee99afae65034512e8aaf26c3108d258 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 29 May 2025 17:31:39 +0000 Subject: [PATCH] feat: Support isin with bigframes.pandas.Index arg --- bigframes/core/indexes/base.py | 4 ++++ bigframes/series.py | 4 +++- tests/system/small/test_index.py | 34 ++++++++++++++++++++++++++++++- tests/system/small/test_series.py | 18 ++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 6da68e2e8f..468ba9c612 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -458,6 +458,10 @@ def unique(self, level: Hashable | int | None = None) -> Index: return self.get_level_values(level).drop_duplicates() def isin(self, values) -> Index: + import bigframes.series as series + + if isinstance(values, (series.Series, Index)): + return Index(self.to_series().isin(values)) if not utils.is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed to " diff --git a/bigframes/series.py b/bigframes/series.py index 626cf2fc76..ba45211729 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -905,8 +905,10 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: ) def isin(self, values) -> "Series" | None: - if isinstance(values, (Series,)): + if isinstance(values, Series): return Series(self._block.isin(values._block)) + if isinstance(values, indexes.Index): + return Series(self._block.isin(values.to_series()._block)) if not _is_list_like(values): raise TypeError( "only list-like objects are allowed to be passed to " diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index 9f45c8465b..6e230974fe 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -375,7 +375,7 @@ def test_index_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep): ) -def test_index_isin(scalars_df_index, scalars_pandas_df_index): +def test_index_isin_list(scalars_df_index, scalars_pandas_df_index): col_name = "int64_col" bf_series = ( scalars_df_index.set_index(col_name).index.isin([2, 55555, 4]).to_pandas() @@ -389,6 +389,38 @@ def test_index_isin(scalars_df_index, scalars_pandas_df_index): ) +def test_index_isin_bf_series(scalars_df_index, scalars_pandas_df_index, session): + col_name = "int64_col" + bf_series = ( + scalars_df_index.set_index(col_name) + .index.isin(bpd.Series([2, 55555, 4], session=session)) + .to_pandas() + ) + pd_result_array = scalars_pandas_df_index.set_index(col_name).index.isin( + [2, 55555, 4] + ) + pd.testing.assert_index_equal( + pd.Index(pd_result_array).set_names(col_name), + bf_series, + ) + + +def test_index_isin_bf_index(scalars_df_index, scalars_pandas_df_index, session): + col_name = "int64_col" + bf_series = ( + scalars_df_index.set_index(col_name) + .index.isin(bpd.Index([2, 55555, 4], session=session)) + .to_pandas() + ) + pd_result_array = scalars_pandas_df_index.set_index(col_name).index.isin( + [2, 55555, 4] + ) + pd.testing.assert_index_equal( + pd.Index(pd_result_array).set_names(col_name), + bf_series, + ) + + def test_multiindex_name_is_none(session): df = pd.DataFrame( { diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index b4c24e4ba9..347b7fd7d1 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1368,6 +1368,24 @@ def test_isin_bigframes_values(scalars_dfs, col_name, test_set, session): ) +def test_isin_bigframes_index(scalars_dfs, session): + scalars_df, scalars_pandas_df = scalars_dfs + bf_result = ( + scalars_df["string_col"] + .isin(bigframes.pandas.Index(["Hello, World!", "Hi", "こんにちは"], session=session)) + .to_pandas() + ) + pd_result = ( + scalars_pandas_df["string_col"] + .isin(pd.Index(["Hello, World!", "Hi", "こんにちは"])) + .astype("boolean") + ) + pd.testing.assert_series_equal( + pd_result, + bf_result, + ) + + @pytest.mark.parametrize( ( "col_name",