diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 35b6ff4ddf..abab9fd268 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -4108,7 +4108,7 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: ) def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): - # In Bigframes remote function, DataFrame '.apply' method is specifically + # In Bigframes BigQuery function, DataFrame '.apply' method is specifically # designed to work with row-wise or column-wise operations, where the input # to the applied function should be a Series, not a scalar. @@ -4116,24 +4116,18 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): msg = bfe.format_message("axis=1 scenario is in preview.") warnings.warn(msg, category=bfe.PreviewWarning) - # TODO(jialuo): Deprecate the "bigframes_remote_function" attribute. - # We have some tests using pre-defined remote_function that were - # defined based on "bigframes_remote_function" instead of - # "bigframes_bigquery_function". So we need to fix those pre-defined - # remote functions before deprecating the "bigframes_remote_function" - # attribute. Check if the function is a remote function. - if not hasattr(func, "bigframes_remote_function") and not hasattr( - func, "bigframes_bigquery_function" - ): - raise ValueError("For axis=1 a bigframes function must be used.") + if not hasattr(func, "bigframes_bigquery_function"): + raise ValueError( + "For axis=1 a BigFrames BigQuery function must be used." + ) is_row_processor = getattr(func, "is_row_processor") if is_row_processor: # Early check whether the dataframe dtypes are currently supported - # in the remote function + # in the bigquery function # NOTE: Keep in sync with the value converters used in the gcf code # generated in function_template.py - remote_function_supported_dtypes = ( + bigquery_function_supported_dtypes = ( bigframes.dtypes.INT_DTYPE, bigframes.dtypes.FLOAT_DTYPE, bigframes.dtypes.BOOL_DTYPE, @@ -4142,18 +4136,18 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): ) supported_dtypes_types = tuple( type(dtype) - for dtype in remote_function_supported_dtypes + for dtype in bigquery_function_supported_dtypes if not isinstance(dtype, pandas.ArrowDtype) ) # Check ArrowDtype separately since multiple BigQuery types map to # ArrowDtype, including BYTES and TIMESTAMP. supported_arrow_types = tuple( dtype.pyarrow_dtype - for dtype in remote_function_supported_dtypes + for dtype in bigquery_function_supported_dtypes if isinstance(dtype, pandas.ArrowDtype) ) supported_dtypes_hints = tuple( - str(dtype) for dtype in remote_function_supported_dtypes + str(dtype) for dtype in bigquery_function_supported_dtypes ) for dtype in self.dtypes: @@ -4186,10 +4180,11 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): ) else: # This is a special case where we are providing not-pandas-like - # extension. If the remote function can take one or more params - # then we assume that here the user intention is to use the - # column values of the dataframe as arguments to the function. - # For this to work the following condition must be true: + # extension. If the bigquery function can take one or more + # params then we assume that here the user intention is to use + # the column values of the dataframe as arguments to the + # function. For this to work the following condition must be + # true: # 1. The number or input params in the function must be same # as the number of columns in the dataframe # 2. The dtypes of the columns in the dataframe must be @@ -4231,15 +4226,16 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): return result_series - # At this point column-wise or element-wise remote function operation will + # At this point column-wise or element-wise bigquery function operation will # be performed (not supported). - if hasattr(func, "bigframes_remote_function"): + if hasattr(func, "bigframes_bigquery_function"): raise formatter.create_exception_with_feedback_link( NotImplementedError, - "BigFrames DataFrame '.apply()' does not support remote function " - "for column-wise (i.e. with axis=0) operations, please use a " - "regular python function instead. For element-wise operations of " - "the remote function, please use '.map()'.", + "BigFrames DataFrame '.apply()' does not support BigFrames " + "BigQuery function for column-wise (i.e. with axis=0) " + "operations, please use a regular python function instead. For " + "element-wise operations of the BigFrames BigQuery function, " + "please use '.map()'.", ) # Per-column apply diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 2a211ad7c1..1444457c90 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -741,7 +741,7 @@ def wrapper(func): # with that name and would directly manage their lifecycle. if created_new and (not name): self._update_temp_artifacts( - func.bigframes_remote_function, func.bigframes_cloud_function + func.bigframes_bigquery_function, func.bigframes_cloud_function ) return func diff --git a/bigframes/functions/function.py b/bigframes/functions/function.py index fa4da1c0e4..fd2f512f97 100644 --- a/bigframes/functions/function.py +++ b/bigframes/functions/function.py @@ -219,7 +219,11 @@ def func(*bigframes_args, **bigframes_kwargs): database=routine_ref.dataset_id, signature=(ibis_signature.input_types, ibis_signature.output_type), ) # type: ignore - func.bigframes_remote_function = str(routine_ref) # type: ignore + func.bigframes_bigquery_function = str(routine_ref) # type: ignore + + # We will keep the "bigframes_remote_function" attr for remote function. + if hasattr(routine, "remote_function_options") and routine.remote_function_options: + func.bigframes_remote_function = func.bigframes_bigquery_function # type: ignore # set input bigframes data types has_unknown_dtypes = False diff --git a/bigframes/series.py b/bigframes/series.py index 2c37913679..34ac3c3de9 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -68,9 +68,9 @@ LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]] -_remote_function_recommendation_message = ( +_bigquery_function_recommendation_message = ( "Your functions could not be applied directly to the Series." - " Try converting it to a remote function." + " Try converting it to a BigFrames BigQuery function." ) _list = list # Type alias to escape Series.list property @@ -1530,25 +1530,20 @@ def apply( if not callable(func): raise ValueError( - "Only a ufunc (a function that applies to the entire Series) or a remote function that only works on single values are supported." + "Only a ufunc (a function that applies to the entire Series) or" + " a BigFrames BigQuery function that only works on single values" + " are supported." ) - # TODO(jialuo): Deprecate the "bigframes_remote_function" attribute. - # We have some tests using pre-defined remote_function that were defined - # based on "bigframes_remote_function" instead of - # "bigframes_bigquery_function". So we need to fix those pre-defined - # remote functions before deprecating the "bigframes_remote_function" - # attribute. - if not hasattr(func, "bigframes_remote_function") and not hasattr( - func, "bigframes_bigquery_function" - ): + if not hasattr(func, "bigframes_bigquery_function"): # It is neither a remote function nor a managed function. # Then it must be a vectorized function that applies to the Series # as a whole. if by_row: raise ValueError( - "A vectorized non-remote function can be provided only with by_row=False." - " For element-wise operation it must be a remote function." + "A vectorized non-BigFrames BigQuery function can be " + "provided only with by_row=False. For element-wise operation " + "it must be a BigFrames BigQuery function." ) try: @@ -1556,12 +1551,12 @@ def apply( except Exception as ex: # This could happen if any of the operators in func is not # supported on a Series. Let's guide the customer to use a - # remote function instead + # bigquery function instead if hasattr(ex, "message"): - ex.message += f"\n{_remote_function_recommendation_message}" + ex.message += f"\n{_bigquery_function_recommendation_message}" raise - # We are working with remote function at this point + # We are working with bigquery function at this point result_series = self._apply_unary_op( ops.RemoteFunctionOp(func=func, apply_on_null=True) ) @@ -1590,21 +1585,21 @@ def combine( ) -> Series: if not callable(func): raise ValueError( - "Only a ufunc (a function that applies to the entire Series) or a remote function that only works on single values are supported." + "Only a ufunc (a function that applies to the entire Series) or" + " a BigFrames BigQuery function that only works on single values" + " are supported." ) - if not hasattr(func, "bigframes_remote_function") and not hasattr( - func, "bigframes_bigquery_function" - ): + if not hasattr(func, "bigframes_bigquery_function"): # Keep this in sync with .apply try: return func(self, other) except Exception as ex: # This could happen if any of the operators in func is not # supported on a Series. Let's guide the customer to use a - # remote function instead + # bigquery function instead if hasattr(ex, "message"): - ex.message += f"\n{_remote_function_recommendation_message}" + ex.message += f"\n{_bigquery_function_recommendation_message}" raise result_series = self._apply_binary_op( @@ -1749,10 +1744,10 @@ def duplicated(self, keep: str = "first") -> Series: def mask(self, cond, other=None) -> Series: if callable(cond): - if hasattr(cond, "bigframes_remote_function"): + if hasattr(cond, "bigframes_bigquery_function"): cond = self.apply(cond) else: - # For non-remote function assume that it is applicable on Series + # For non-BigQuery function assume that it is applicable on Series cond = self.apply(cond, by_row=False) if not isinstance(cond, Series): diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py index 0af6810fa9..a217921abd 100644 --- a/tests/system/large/functions/test_managed_function.py +++ b/tests/system/large/functions/test_managed_function.py @@ -147,6 +147,18 @@ def func(x, y): .to_pandas() ) pandas.testing.assert_series_equal(bf_result, pd_result) + + # Make sure the read_gbq_function path works for this function. + managed_func_ref = session.read_gbq_function( + managed_func.bigframes_bigquery_function + ) + bf_result_gbq = ( + scalars_df["string_col"] + .combine(scalars_df["int64_col"], managed_func_ref) + .to_pandas() + ) + pandas.testing.assert_series_equal(bf_result_gbq, pd_result) + finally: # clean up the gcp assets created for the managed function. cleanup_function_assets( @@ -181,6 +193,23 @@ def featurize(x: int) -> list[array_dtype]: # type: ignore # Ignore any dtype disparity. pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + # Make sure the read_gbq_function path works for this function. + featurize_ref = session.read_gbq_function(featurize.bigframes_bigquery_function) + + assert hasattr(featurize_ref, "bigframes_bigquery_function") + assert not hasattr(featurize_ref, "bigframes_remote_function") + assert ( + featurize_ref.bigframes_bigquery_function + == featurize.bigframes_bigquery_function + ) + + # Test on the function from read_gbq_function. + got = featurize_ref(10) + assert got == [array_dtype(i) for i in [10, 11, 12]] + + bf_result_gbq = bf_int64_col.apply(featurize_ref).to_pandas() + pandas.testing.assert_series_equal(bf_result_gbq, pd_result, check_dtype=False) + finally: # Clean up the gcp assets created for the managed function. cleanup_function_assets( @@ -295,6 +324,22 @@ def foo(x, y, z): expected_result, bf_result, check_dtype=False, check_index_type=False ) + # Make sure the read_gbq_function path works for this function. + foo_ref = session.read_gbq_function(foo.bigframes_bigquery_function) + + assert hasattr(foo_ref, "bigframes_bigquery_function") + assert not hasattr(foo_ref, "bigframes_remote_function") + assert foo_ref.bigframes_bigquery_function == foo.bigframes_bigquery_function + + # Test on the function from read_gbq_function. + got = foo_ref(10, 38, "hello") + assert got == ["10", "38.0", "hello"] + + bf_result_gbq = bf_df.apply(foo_ref, axis=1).to_pandas() + pandas.testing.assert_series_equal( + bf_result_gbq, expected_result, check_dtype=False, check_index_type=False + ) + finally: # Clean up the gcp assets created for the managed function. cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient) diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 0d7f888306..1e5e7ede26 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -882,6 +882,7 @@ def square(x): # The remote function should reflect the explicitly provided name assert square_remote.bigframes_remote_function == expected_remote_function + assert square_remote.bigframes_bigquery_function == expected_remote_function # Now the expected BQ remote function should exist session.bqclient.get_routine(expected_remote_function) @@ -1013,6 +1014,7 @@ def test_internal(rf, udf): # The remote function should reflect the explicitly provided name assert square_remote1.bigframes_remote_function == expected_remote_function + assert square_remote1.bigframes_bigquery_function == expected_remote_function # Now the expected BQ remote function should exist routine = session.bqclient.get_routine(expected_remote_function) @@ -1037,6 +1039,7 @@ def test_internal(rf, udf): # The new remote function should still reflect the explicitly provided name assert square_remote2.bigframes_remote_function == expected_remote_function + assert square_remote2.bigframes_bigquery_function == expected_remote_function # The expected BQ remote function should still exist routine = session.bqclient.get_routine(expected_remote_function) @@ -1080,6 +1083,7 @@ def plusone(x): # The new remote function should still reflect the explicitly provided name assert plusone_remote.bigframes_remote_function == expected_remote_function + assert plusone_remote.bigframes_bigquery_function == expected_remote_function # The expected BQ remote function should still exist routine = session.bqclient.get_routine(expected_remote_function) @@ -1234,7 +1238,7 @@ def square(x): return x * x assert ( - bigquery.Routine(square.bigframes_remote_function).dataset_id + bigquery.Routine(square.bigframes_bigquery_function).dataset_id == session._anonymous_dataset.dataset_id ) @@ -1495,7 +1499,7 @@ def square(x): )(square) bq_routine = session.bqclient.get_routine( - square_remote.bigframes_remote_function + square_remote.bigframes_bigquery_function ) assert bq_routine.remote_function_options.max_batching_rows == max_batching_rows @@ -1642,7 +1646,7 @@ def serialize_row(row): # Let's make sure the read_gbq_function path works for this function serialize_row_reuse = session.read_gbq_function( - serialize_row_remote.bigframes_remote_function, is_row_processor=True + serialize_row_remote.bigframes_bigquery_function, is_row_processor=True ) bf_result = scalars_df[columns].apply(serialize_row_reuse, axis=1).to_pandas() pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) @@ -1950,6 +1954,8 @@ def foo(x: int) -> int: # ensure that remote function artifacts are created assert foo.bigframes_remote_function is not None session.bqclient.get_routine(foo.bigframes_remote_function) is not None + assert foo.bigframes_bigquery_function is not None + session.bqclient.get_routine(foo.bigframes_bigquery_function) is not None assert foo.bigframes_cloud_function is not None session.cloudfunctionsclient.get_function( name=foo.bigframes_cloud_function @@ -1960,7 +1966,7 @@ def foo(x: int) -> int: # ensure that the bq remote function is deleted with pytest.raises(google.cloud.exceptions.NotFound): - session.bqclient.get_routine(foo.bigframes_remote_function) + session.bqclient.get_routine(foo.bigframes_bigquery_function) # the deletion of cloud function happens in a non-blocking way, ensure that # it either exists in a being-deleted state, or is already deleted @@ -1990,6 +1996,8 @@ def foo(x: int) -> int: # ensure that remote function artifacts are created assert foo.bigframes_remote_function is not None session.bqclient.get_routine(foo.bigframes_remote_function) is not None + assert foo.bigframes_bigquery_function is not None + session.bqclient.get_routine(foo.bigframes_bigquery_function) is not None assert foo.bigframes_cloud_function is not None session.cloudfunctionsclient.get_function( name=foo.bigframes_cloud_function @@ -1999,7 +2007,7 @@ def foo(x: int) -> int: session.close() # ensure that the bq remote function still exists - session.bqclient.get_routine(foo.bigframes_remote_function) is not None + session.bqclient.get_routine(foo.bigframes_bigquery_function) is not None # the deletion of cloud function happens in a non-blocking way, ensure # that it was not deleted and still exists in active state @@ -2038,6 +2046,8 @@ def foo_named(x: int) -> int: for foo in [foo_unnamed, foo_named]: assert foo.bigframes_remote_function is not None session.bqclient.get_routine(foo.bigframes_remote_function) is not None + assert foo.bigframes_bigquery_function is not None + session.bqclient.get_routine(foo.bigframes_bigquery_function) is not None assert foo.bigframes_cloud_function is not None session.cloudfunctionsclient.get_function( name=foo.bigframes_cloud_function @@ -2051,7 +2061,7 @@ def foo_named(x: int) -> int: # ensure that the unnamed bq remote function is deleted along with its # corresponding cloud function with pytest.raises(google.cloud.exceptions.NotFound): - session.bqclient.get_routine(foo_unnamed.bigframes_remote_function) + session.bqclient.get_routine(foo_unnamed.bigframes_bigquery_function) try: gcf = session.cloudfunctionsclient.get_function( name=foo_unnamed.bigframes_cloud_function @@ -2062,7 +2072,7 @@ def foo_named(x: int) -> int: # ensure that the named bq remote function still exists along with its # corresponding cloud function - session.bqclient.get_routine(foo_named.bigframes_remote_function) is not None + session.bqclient.get_routine(foo_named.bigframes_bigquery_function) is not None gcf = session.cloudfunctionsclient.get_function( name=foo_named.bigframes_cloud_function ) @@ -2139,7 +2149,7 @@ def foo(x, y, z): ) # Let's make sure the read_gbq_function path works for this function - foo_reuse = session.read_gbq_function(foo.bigframes_remote_function) + foo_reuse = session.read_gbq_function(foo.bigframes_bigquery_function) bf_result = bf_df.apply(foo_reuse, axis=1).to_pandas() pandas.testing.assert_series_equal( expected_result, bf_result, check_dtype=False, check_index_type=False @@ -2225,7 +2235,7 @@ def foo(x, y, z): ) # Let's make sure the read_gbq_function path works for this function - foo_reuse = session.read_gbq_function(foo.bigframes_remote_function) + foo_reuse = session.read_gbq_function(foo.bigframes_bigquery_function) bf_result = bf_df.apply(foo_reuse, axis=1).to_pandas() pandas.testing.assert_series_equal( expected_result, bf_result, check_dtype=False, check_index_type=False @@ -2325,7 +2335,7 @@ def generate_stats(row: pandas.Series) -> list[int]: # Let's make sure the read_gbq_function path works for this function generate_stats_reuse = session.read_gbq_function( - generate_stats.bigframes_remote_function, + generate_stats.bigframes_bigquery_function, is_row_processor=True, ) bf_result = scalars_df[columns].apply(generate_stats_reuse, axis=1).to_pandas() @@ -2468,7 +2478,7 @@ def add_one(x: int) -> int: )(add_one) temporary_bigquery_remote_function = ( - add_one_remote_temp.bigframes_remote_function + add_one_remote_temp.bigframes_bigquery_function ) assert temporary_bigquery_remote_function is not None assert ( @@ -2545,7 +2555,7 @@ def add_one(x: int) -> int: )(add_one) persistent_bigquery_remote_function = ( - add_one_remote_persist.bigframes_remote_function + add_one_remote_persist.bigframes_bigquery_function ) assert persistent_bigquery_remote_function is not None assert ( @@ -2626,7 +2636,7 @@ def featurize(x: int) -> list[array_dtype]: # type: ignore # Let's make sure the read_gbq_function path works for this function featurize_reuse = session.read_gbq_function( - featurize.bigframes_remote_function # type: ignore + featurize.bigframes_bigquery_function # type: ignore ) bf_result = scalars_df["int64_too"].apply(featurize_reuse).to_pandas() pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) @@ -2664,7 +2674,7 @@ def featurize(x: float) -> list[float]: # type: ignore # Let's make sure the read_gbq_function path works for this function featurize_reuse = unordered_session.read_gbq_function( - featurize.bigframes_remote_function # type: ignore + featurize.bigframes_bigquery_function # type: ignore ) bf_int64_col = scalars_df["float64_col"].dropna() bf_result = bf_int64_col.apply(featurize_reuse).to_pandas() diff --git a/tests/system/small/functions/test_managed_function.py b/tests/system/small/functions/test_managed_function.py index b0d89b4cd4..54cf494f79 100644 --- a/tests/system/small/functions/test_managed_function.py +++ b/tests/system/small/functions/test_managed_function.py @@ -18,6 +18,7 @@ import bigframes.exceptions from bigframes.functions import _function_session as bff_session +from bigframes.functions import function as bff from bigframes.functions._utils import get_python_version from bigframes.pandas import udf import bigframes.pandas as bpd @@ -44,6 +45,7 @@ ], ) def test_managed_function_series_apply( + session, typ, scalars_dfs, dataset_id_permanent, @@ -80,6 +82,22 @@ def foo(x): assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) + # Make sure the read_gbq_function path works for this function. + foo_ref = bff.read_gbq_function( + function_name=foo.bigframes_bigquery_function, # type: ignore + session=session, + ) + assert hasattr(foo_ref, "bigframes_bigquery_function") + assert not hasattr(foo_ref, "bigframes_remote_function") + assert foo.bigframes_bigquery_function == foo_ref.bigframes_bigquery_function # type: ignore + + bf_result_col_gbq = scalars_df["int64_too"].apply(foo_ref) + bf_result_gbq = ( + scalars_df["int64_too"].to_frame().assign(result=bf_result_col_gbq).to_pandas() + ) + + assert_pandas_df_equal(bf_result_gbq, pd_result, check_dtype=False) + def test_managed_function_series_combine(dataset_id_permanent, scalars_dfs): # This function is deliberately written to not work with NA input. @@ -164,7 +182,9 @@ def foo_list(x): assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) -def test_managed_function_series_combine_list_output(dataset_id_permanent, scalars_dfs): +def test_managed_function_series_combine_list_output( + session, dataset_id_permanent, scalars_dfs +): def add_list(x: int, y: int) -> list[int]: return [x, y] @@ -198,6 +218,31 @@ def add_list(x: int, y: int) -> list[int]: # Ignore any dtype difference. pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + # Make sure the read_gbq_function path works for this function. + add_list_managed_func_ref = bff.read_gbq_function( + function_name=add_list_managed_func.bigframes_bigquery_function, # type: ignore + session=session, + ) + + assert hasattr(add_list_managed_func_ref, "bigframes_bigquery_function") + assert not hasattr(add_list_managed_func_ref, "bigframes_remote_function") + assert ( + add_list_managed_func_ref.bigframes_bigquery_function + == add_list_managed_func.bigframes_bigquery_function + ) + + # Test on the function from read_gbq_function. + got = add_list_managed_func_ref(10, 38) + assert got == [10, 38] + + bf_result_gbq = ( + bf_df[bf_filter][int_col_name_with_nulls] + .combine(bf_df[bf_filter][int_col_name_no_nulls], add_list_managed_func_ref) + .to_pandas() + ) + + pd.testing.assert_series_equal(bf_result_gbq, pd_result, check_dtype=False) + def test_managed_function_dataframe_map(scalars_dfs, dataset_id_permanent): def add_one(x): @@ -266,7 +311,9 @@ def add_ints(x, y): ) -def test_managed_function_dataframe_map_list_output(scalars_dfs, dataset_id_permanent): +def test_managed_function_dataframe_map_list_output( + session, scalars_dfs, dataset_id_permanent +): def add_one_list(x): return [x + 1] * 3 @@ -291,6 +338,15 @@ def add_one_list(x): # Ignore any dtype difference. assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) + # Make sure the read_gbq_function path works for this function. + mf_add_one_list_ref = bff.read_gbq_function( + function_name=mf_add_one_list.bigframes_bigquery_function, # type: ignore + session=session, + ) + + bf_result_gbq = bf_int64_df_filtered.map(mf_add_one_list_ref).to_pandas() + assert_pandas_df_equal(bf_result_gbq, pd_result, check_dtype=False) + def test_managed_function_dataframe_apply_axis_1_list_output( session, scalars_dfs, dataset_id_permanent @@ -326,3 +382,27 @@ def add_ints_list(x, y): # Ignore any dtype difference. pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + + # Make sure the read_gbq_function path works for this function. + add_ints_list_mf_ref = bff.read_gbq_function( + function_name=add_ints_list_mf.bigframes_bigquery_function, # type: ignore + session=session, + ) + assert hasattr(add_ints_list_mf_ref, "bigframes_bigquery_function") + assert not hasattr(add_ints_list_mf_ref, "bigframes_remote_function") + assert ( + add_ints_list_mf_ref.bigframes_bigquery_function + == add_ints_list_mf.bigframes_bigquery_function + ) + + with pytest.warns( + bigframes.exceptions.PreviewWarning, + match="axis=1 scenario is in preview.", + ): + bf_result_gbq = ( + bpd.DataFrame({"x": series, "y": series}) + .apply(add_ints_list_mf_ref, axis=1) + .to_pandas() + ) + + pd.testing.assert_series_equal(bf_result_gbq, pd_result, check_dtype=False) diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index a68b0231f5..0af7f4e42e 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -125,6 +125,7 @@ def square(x): # Function should have extra metadata attached for remote execution. assert hasattr(square, "bigframes_remote_function") + assert hasattr(square, "bigframes_bigquery_function") assert hasattr(square, "bigframes_cloud_function") assert hasattr(square, "ibis_node") @@ -666,7 +667,7 @@ def square1(x): assert square1(2) == 4 square2 = bff.read_gbq_function( - function_name=square1.bigframes_remote_function, # type: ignore + function_name=square1.bigframes_bigquery_function, # type: ignore session=session, ) @@ -674,13 +675,17 @@ def square1(x): # cloud function associated with it, while the read-back version (square2) # should only have a remote function. assert square1.bigframes_remote_function # type: ignore + assert square1.bigframes_bigquery_function # type: ignore assert square1.bigframes_cloud_function # type: ignore assert square2.bigframes_remote_function + assert square2.bigframes_bigquery_function assert not hasattr(square2, "bigframes_cloud_function") # They should point to the same function. assert square1.bigframes_remote_function == square2.bigframes_remote_function # type: ignore + assert square1.bigframes_bigquery_function == square2.bigframes_bigquery_function # type: ignore + assert square2.bigframes_remote_function == square2.bigframes_bigquery_function # type: ignore # The result of applying them should be the same. int64_col = scalars_df_index["int64_col"] @@ -854,7 +859,7 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): ) # It should point to the named routine and yield the expected results. - assert square.bigframes_remote_function == str(routine.reference) + assert square.bigframes_bigquery_function == str(routine.reference) assert square.input_dtypes == (bigframes.dtypes.INT_DTYPE,) assert square.output_dtype == bigframes.dtypes.INT_DTYPE assert ( @@ -1088,10 +1093,11 @@ def test_df_apply_scalar_func(session, scalars_dfs): with pytest.raises(NotImplementedError) as context: bdf.apply(func_ref) assert str(context.value) == ( - "BigFrames DataFrame '.apply()' does not support remote function for " - "column-wise (i.e. with axis=0) operations, please use a regular python " - "function instead. For element-wise operations of the remote function, " - f"please use '.map()'. {constants.FEEDBACK_LINK}" + "BigFrames DataFrame '.apply()' does not support BigFrames BigQuery " + "function for column-wise (i.e. with axis=0) operations, please use a " + "regular python function instead. For element-wise operations of the " + "BigFrames BigQuery function, please use '.map()'. " + f"{constants.FEEDBACK_LINK}" ) @@ -1135,6 +1141,7 @@ def add_ints(row): name=get_function_name(add_ints, is_row_processor=True), )(add_ints) assert add_ints_remote.bigframes_remote_function # type: ignore + assert add_ints_remote.bigframes_bigquery_function # type: ignore assert add_ints_remote.bigframes_cloud_function # type: ignore with pytest.warns( @@ -1156,11 +1163,13 @@ def add_ints(row): # Read back the deployed BQ remote function using read_gbq_function. func_ref = session.read_gbq_function( - function_name=add_ints_remote.bigframes_remote_function, # type: ignore + function_name=add_ints_remote.bigframes_bigquery_function, # type: ignore is_row_processor=True, ) assert func_ref.bigframes_remote_function == add_ints_remote.bigframes_remote_function # type: ignore + assert func_ref.bigframes_bigquery_function == add_ints_remote.bigframes_bigquery_function # type: ignore + assert func_ref.bigframes_remote_function == func_ref.bigframes_bigquery_function # type: ignore bf_result_gbq = scalars_df[columns].apply(func_ref, axis=1).to_pandas() pd.testing.assert_series_equal( @@ -1248,7 +1257,7 @@ def add_ints(row): scalars_pandas_df.apply(add_ints, axis=1) with pytest.raises( - ValueError, match="For axis=1 a bigframes function must be used." + ValueError, match="For axis=1 a BigFrames BigQuery function must be used." ): scalars_df[columns].apply(add_ints, axis=1)