diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index c04de54be6..17d2a1a0d0 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -237,6 +237,7 @@ def _try_delattr(self, func: Callable, attr: str) -> None: # https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py def remote_function( self, + *, input_types: Union[None, type, Sequence[type]] = None, output_type: Optional[type] = None, session: Optional[Session] = None, @@ -251,7 +252,7 @@ def remote_function( reuse: bool = True, name: Optional[str] = None, packages: Optional[Sequence[str]] = None, - cloud_function_service_account: Optional[str] = None, + cloud_function_service_account: str, cloud_function_kms_key_name: Optional[str] = None, cloud_function_docker_repository: Optional[str] = None, max_batching_rows: Optional[int] = 1000, @@ -384,8 +385,8 @@ def remote_function( Explicit name of the external package dependencies. Each dependency is added to the `requirements.txt` as is, and can be of the form supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/. - cloud_function_service_account (str, Optional): - Service account to use for the cloud functions. If not provided then + cloud_function_service_account (str): + Service account to use for the cloud functions. If "default" provided then the default service account would be used. See https://cloud.google.com/functions/docs/securing/function-identity for more details. Please make sure the service account has the @@ -455,22 +456,12 @@ def remote_function( # Some defaults may be used from the session if not provided otherwise. session = self._resolve_session(session) - # raise a UserWarning if user does not explicitly set cloud_function_service_account to a - # user-managed cloud_function_service_account of to default - msg = bfe.format_message( - "You have not explicitly set a user-managed `cloud_function_service_account`. " - "Using the default Compute Engine service account. " - "In BigFrames 2.0 onwards, you would have to explicitly set `cloud_function_service_account` " - 'either to a user-managed service account (preferred) or to `"default"` ' - "to use the default Compute Engine service account (discouraged). " - "See, https://cloud.google.com/functions/docs/securing/function-identity." - ) - + # If the user forces the cloud function service argument to None, throw + # an exception if cloud_function_service_account is None: - warnings.warn(msg, stacklevel=2, category=FutureWarning) - - if cloud_function_service_account == "default": - cloud_function_service_account = None + raise ValueError( + 'You must provide a user managed cloud_function_service_account, or "default" if you would like to let the default service account be used.' + ) # A BigQuery client is required to perform BQ operations. bigquery_client = self._resolve_bigquery_client(session, bigquery_client) @@ -615,7 +606,9 @@ def wrapper(func): bq_connection_manager, cloud_function_region, cloud_functions_client, - cloud_function_service_account, + None + if cloud_function_service_account == "default" + else cloud_function_service_account, cloud_function_kms_key_name, cloud_function_docker_repository, session=session, # type: ignore diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 730c287e1f..8a7e5ba7d7 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -65,6 +65,7 @@ def remote_function( + *, input_types: Union[None, type, Sequence[type]] = None, output_type: Optional[type] = None, dataset: Optional[str] = None, @@ -72,7 +73,7 @@ def remote_function( reuse: bool = True, name: Optional[str] = None, packages: Optional[Sequence[str]] = None, - cloud_function_service_account: Optional[str] = None, + cloud_function_service_account: str, cloud_function_kms_key_name: Optional[str] = None, cloud_function_docker_repository: Optional[str] = None, max_batching_rows: Optional[int] = 1000, diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 3ac9b75039..a7b4ab729e 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1202,6 +1202,7 @@ def _check_file_size(self, filepath: str): def remote_function( self, + *, input_types: Union[None, type, Sequence[type]] = None, output_type: Optional[type] = None, dataset: Optional[str] = None, @@ -1209,7 +1210,7 @@ def remote_function( reuse: bool = True, name: Optional[str] = None, packages: Optional[Sequence[str]] = None, - cloud_function_service_account: Optional[str] = None, + cloud_function_service_account: str, cloud_function_kms_key_name: Optional[str] = None, cloud_function_docker_repository: Optional[str] = None, max_batching_rows: Optional[int] = 1000, @@ -1327,8 +1328,8 @@ def remote_function( Explicit name of the external package dependencies. Each dependency is added to the `requirements.txt` as is, and can be of the form supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/. - cloud_function_service_account (str, Optional): - Service account to use for the cloud functions. If not provided + cloud_function_service_account (str): + Service account to use for the cloud functions. If "default" provided then the default service account would be used. See https://cloud.google.com/functions/docs/securing/function-identity for more details. Please make sure the service account has the @@ -1406,8 +1407,8 @@ def remote_function( `bigframes_remote_function` - The bigquery remote function capable of calling into `bigframes_cloud_function`. """ return self._function_session.remote_function( - input_types, - output_type, + input_types=input_types, + output_type=output_type, session=self, dataset=dataset, bigquery_connection=bigquery_connection, @@ -1499,8 +1500,8 @@ def udf( deployed for the user defined code. """ return self._function_session.udf( - input_types, - output_type, + input_types=input_types, + output_type=output_type, session=self, dataset=dataset, bigquery_connection=bigquery_connection, @@ -1593,7 +1594,7 @@ def read_gbq_function( Another use case is to define your own remote function and use it later. For example, define the remote function: - >>> @bpd.remote_function() + >>> @bpd.remote_function(cloud_function_service_account="default") ... def tenfold(num: int) -> float: ... return num * 10 @@ -1620,7 +1621,7 @@ def read_gbq_function( note, row processor implies that the function has only one input parameter. - >>> @bpd.remote_function() + >>> @bpd.remote_function(cloud_function_service_account="default") ... def row_sum(s: bpd.Series) -> float: ... return s['a'] + s['b'] + s['c'] diff --git a/notebooks/apps/synthetic_data_generation.ipynb b/notebooks/apps/synthetic_data_generation.ipynb index c190f219af..f830e35c16 100644 --- a/notebooks/apps/synthetic_data_generation.ipynb +++ b/notebooks/apps/synthetic_data_generation.ipynb @@ -248,8 +248,8 @@ }, "outputs": [], "source": [ - "@bpd.remote_function([int], str, packages=['faker', 'pandas'])\n", - "def data_generator(id):\n", + "@bpd.remote_function(packages=['faker', 'pandas'], cloud_function_service_account=\"default\")\n", + "def data_generator(id: int) -> str:\n", " context = {}\n", " exec(code, context)\n", " result_df = context.get(\"result_df\")\n", diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb index 88633f8635..788111cfe6 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb @@ -914,8 +914,8 @@ }, "outputs": [], "source": [ - "@bf.remote_function([str], str)\n", - "def extract_code(text: str):\n", + "@bf.remote_function(cloud_function_service_account=\"default\")\n", + "def extract_code(text: str) -> str:\n", " try:\n", " res = text[text.find('\\n')+1:text.find('```', 3)]\n", " res = res.replace(\"import pandas as pd\", \"import bigframes.pandas as bf\")\n", diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb index c5deeef1c5..a8158bcb85 100644 --- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb +++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb @@ -1485,8 +1485,8 @@ }, "outputs": [], "source": [ - "@bpd.remote_function([float], str)\n", - "def get_bucket(num):\n", + "@bpd.remote_function(cloud_function_service_account=\"default\")\n", + "def get_bucket(num: float) -> str:\n", " if not num: return \"NA\"\n", " boundary = 4000\n", " return \"at_or_above_4000\" if num >= boundary else \"below_4000\"" diff --git a/notebooks/location/regionalized.ipynb b/notebooks/location/regionalized.ipynb index 1b138c6a66..066cd18136 100644 --- a/notebooks/location/regionalized.ipynb +++ b/notebooks/location/regionalized.ipynb @@ -1475,8 +1475,8 @@ } ], "source": [ - "@bpd.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n", - "def get_bucket(num):\n", + "@bpd.remote_function(bigquery_connection='bigframes-rf-conn', cloud_function_service_account=\"default\")\n", + "def get_bucket(num: float) -> str:\n", " if not num: return \"NA\"\n", " boundary = 4000\n", " return \"at_or_above_4000\" if num >= boundary else \"below_4000\"" diff --git a/notebooks/remote_functions/remote_function.ipynb b/notebooks/remote_functions/remote_function.ipynb index 2114311e10..e2bc88ecae 100644 --- a/notebooks/remote_functions/remote_function.ipynb +++ b/notebooks/remote_functions/remote_function.ipynb @@ -174,7 +174,7 @@ "source": [ "# User defined function\n", "# https://www.codespeedy.com/find-nth-prime-number-in-python/\n", - "def nth_prime(n):\n", + "def nth_prime(n: int) -> int:\n", " prime_numbers = [2,3]\n", " i=3\n", " if(0 int:\n", " prime_numbers = [2,3]\n", " i=3\n", " if(0 str:\n", " if duration_minutes < 90:\n", " return \"short\"\n", @@ -466,7 +466,7 @@ } ], "source": [ - "@bpd.remote_function(reuse=False)\n", + "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n", "def duration_category(duration_minutes: int) -> str:\n", " if duration_minutes < 90:\n", " return DURATION_CATEGORY_SHORT\n", @@ -675,7 +675,7 @@ } ], "source": [ - "@bpd.remote_function(reuse=False)\n", + "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n", "def duration_category(duration_minutes: int) -> str:\n", " duration_hours = mymath.ceil(duration_minutes / 60)\n", " return f\"{duration_hours}h\"\n", @@ -886,7 +886,7 @@ } ], "source": [ - "@bpd.remote_function(reuse=False)\n", + "@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n", "def duration_category(duration_minutes: int) -> str:\n", " duration_hours = get_hour_ceiling(duration_minutes)\n", " return f\"{duration_hours} hrs\"\n", @@ -1068,7 +1068,7 @@ } ], "source": [ - "@bpd.remote_function(reuse=False, packages=[\"cryptography\"])\n", + "@bpd.remote_function(reuse=False, packages=[\"cryptography\"], cloud_function_service_account=\"default\")\n", "def get_hash(input: str) -> str:\n", " from cryptography.fernet import Fernet\n", "\n", @@ -1271,7 +1271,7 @@ } ], "source": [ - "@bpd.remote_function(reuse=False, packages=[\"humanize\"])\n", + "@bpd.remote_function(reuse=False, packages=[\"humanize\"], cloud_function_service_account=\"default\")\n", "def duration_category(duration_minutes: int) -> str:\n", " timedelta = dt.timedelta(minutes=duration_minutes)\n", " return humanize.naturaldelta(timedelta)\n", @@ -1442,7 +1442,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb index 78f0d27474..605f879bc7 100644 --- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb +++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb @@ -286,7 +286,9 @@ "source": [ "@bpd.remote_function(packages=[\"anthropic[vertex]\", \"google-auth[requests]\"],\n", " max_batching_rows=1, \n", - " bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\") # replace with your connection\n", + " bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\", # replace with your connection\n", + " cloud_function_service_account=\"default\",\n", + ")\n", "def anthropic_transformer(message: str) -> str:\n", " from anthropic import AnthropicVertex\n", " client = AnthropicVertex(region=LOCATION, project_id=PROJECT)\n", diff --git a/samples/snippets/remote_function.py b/samples/snippets/remote_function.py index c35daf35fc..3a7031ef89 100644 --- a/samples/snippets/remote_function.py +++ b/samples/snippets/remote_function.py @@ -47,9 +47,8 @@ def run_remote_function_and_read_gbq_function(project_id: str) -> None: # of the penguins, which is a real number, into a category, which is a # string. @bpd.remote_function( - float, - str, reuse=False, + cloud_function_service_account="default", ) def get_bucket(num: float) -> str: if not num: @@ -91,10 +90,9 @@ def get_bucket(num: float) -> str: # as a remote function. The custom function in this example has external # package dependency, which can be specified via `packages` parameter. @bpd.remote_function( - str, - str, reuse=False, packages=["cryptography"], + cloud_function_service_account="default", ) def get_hash(input: str) -> str: from cryptography.fernet import Fernet diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 1e5e7ede26..bf83fbc480 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -17,11 +17,9 @@ import inspect import math # must keep this at top level to test udf referring global import import os.path -import re import shutil import tempfile import textwrap -import typing import warnings import google.api_core.exceptions @@ -111,11 +109,12 @@ def test_remote_function_multiply_with_ibis( try: @session.remote_function( - [int, int], - int, - dataset_id, - bq_cf_connection, + input_types=[int, int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def multiply(x, y): return x * y @@ -165,11 +164,12 @@ def test_remote_function_stringify_with_ibis( try: @session.remote_function( - [int], - str, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=str, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def stringify(x): return f"I got {x}" @@ -213,11 +213,12 @@ def func(x, y): return x * abs(y % 4) remote_func = session.remote_function( - [str, int], - str, - dataset_id, - bq_cf_connection, + input_types=[str, int], + output_type=str, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(func) scalars_df, scalars_pandas_df = scalars_dfs @@ -250,11 +251,12 @@ def func(x, y): return [len(x), abs(y % 4)] remote_func = session.remote_function( - [str, int], - list[int], - dataset_id, - bq_cf_connection, + input_types=[str, int], + output_type=list[int], + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(func) scalars_df, scalars_pandas_df = scalars_dfs @@ -284,11 +286,12 @@ def test_remote_function_decorator_with_bigframes_series( try: @session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def square(x): return x * x @@ -330,11 +333,12 @@ def add_one(x): return x + 1 remote_add_one = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(add_one) scalars_df, scalars_pandas_df = scalars_dfs @@ -380,7 +384,12 @@ def test_remote_function_input_types(session, scalars_dfs, input_types): def add_one(x): return x + 1 - remote_add_one = session.remote_function(input_types, int, reuse=False)(add_one) + remote_add_one = session.remote_function( + input_types=input_types, + output_type=int, + reuse=False, + cloud_function_service_account="default", + )(add_one) assert remote_add_one.input_dtypes == (bigframes.dtypes.INT_DTYPE,) scalars_df, scalars_pandas_df = scalars_dfs @@ -406,11 +415,12 @@ def test_remote_function_explicit_dataset_not_created( try: @session.remote_function( - [int], - int, - dataset_id_not_created, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id_not_created, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def square(x): return x * x @@ -459,11 +469,12 @@ def sign(num): return NO_SIGN remote_sign = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(sign) scalars_df, scalars_pandas_df = scalars_dfs @@ -506,11 +517,12 @@ def circumference(radius): return 2 * mymath.pi * radius remote_circumference = session.remote_function( - [float], - float, - dataset_id, - bq_cf_connection, + input_types=[float], + output_type=float, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(circumference) scalars_df, scalars_pandas_df = scalars_dfs @@ -555,11 +567,12 @@ def find_team(num): return _team_pi remote_find_team = session.remote_function( - [float], - str, - dataset_id, - bq_cf_connection, + input_types=[float], + output_type=str, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(find_team) scalars_df, scalars_pandas_df = scalars_dfs @@ -627,11 +640,12 @@ def add_one(x): # The first time both the cloud function and the bq remote function don't # exist and would be created remote_add_one = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=True, + cloud_function_service_account="default", )(add_one_uniq) # There should have been excactly one cloud function created at this point @@ -697,11 +711,12 @@ def inner_test(): # exist even though the remote function exists, and goes ahead and recreates # the cloud function remote_add_one = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=True, + cloud_function_service_account="default", )(add_one_uniq) # There should be excactly one cloud function again @@ -743,11 +758,12 @@ def is_odd(num): return flag is_odd_remote = session.remote_function( - [int], - bool, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=bool, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(is_odd) scalars_df, scalars_pandas_df = scalars_dfs @@ -783,11 +799,12 @@ def is_odd(num): return flag is_odd_remote = session.remote_function( - [int], - bool, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=bool, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(is_odd) scalars_df, scalars_pandas_df = scalars_dfs @@ -817,11 +834,12 @@ def test_remote_udf_lambda(session, scalars_dfs, dataset_id, bq_cf_connection): add_one_lambda = lambda x: x + 1 # noqa: E731 add_one_lambda_remote = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(add_one_lambda) scalars_df, scalars_pandas_df = scalars_dfs @@ -872,12 +890,13 @@ def square(x): # Create the remote function with the name provided explicitly square_remote = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, name=rf_name, + cloud_function_service_account="default", )(square) # The remote function should reflect the explicitly provided name @@ -925,12 +944,13 @@ def pd_np_foo(x): # Create the remote function with the name provided explicitly pd_np_foo_remote = session.remote_function( - [int], - float, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=float, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, reuse=False, packages=["numpy", "pandas >= 2.0.0"], + cloud_function_service_account="default", )(pd_np_foo) # The behavior of the created remote function should be as expected @@ -1005,11 +1025,12 @@ def test_internal(rf, udf): # Create a new remote function with the name provided explicitly square_remote1 = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, name=rf_name, + cloud_function_service_account="default", )(square_uniq) # The remote function should reflect the explicitly provided name @@ -1030,11 +1051,12 @@ def test_internal(rf, udf): # explicitly. Since reuse is True by default, the previously created # remote function with the same name will be reused. square_remote2 = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, name=rf_name, + cloud_function_service_account="default", )(square_uniq) # The new remote function should still reflect the explicitly provided name @@ -1074,11 +1096,12 @@ def plusone(x): # created remote function with the same name should not be reused since # this time it is a different user code. plusone_remote = session.remote_function( - [int], - int, - dataset_id, - bq_cf_connection, + input_types=[int], + output_type=int, + dataset=dataset_id, + bigquery_connection=bq_cf_connection, name=rf_name, + cloud_function_service_account="default", )(plusone_uniq) # The new remote function should still reflect the explicitly provided name @@ -1139,7 +1162,13 @@ def test_remote_function_via_session_context_connection_setter( # unique dataset_id, even though the cloud function would be reused, the bq # remote function would still be created, making use of the bq connection # set in the BigQueryOptions above. - @session.remote_function([int], int, dataset=dataset_id, reuse=False) + @session.remote_function( + input_types=[int], + output_type=int, + dataset=dataset_id, + reuse=False, + cloud_function_service_account="default", + ) def square(x): return x * x @@ -1174,7 +1203,13 @@ def square(x): def test_remote_function_default_connection(session, scalars_dfs, dataset_id): try: - @session.remote_function([int], int, dataset=dataset_id, reuse=False) + @session.remote_function( + input_types=[int], + output_type=int, + dataset=dataset_id, + reuse=False, + cloud_function_service_account="default", + ) def square(x): return x * x @@ -1209,7 +1244,13 @@ def square(x): def test_remote_function_runtime_error(session, scalars_dfs, dataset_id): try: - @session.remote_function([int], int, dataset=dataset_id, reuse=False) + @session.remote_function( + input_types=[int], + output_type=int, + dataset=dataset_id, + reuse=False, + cloud_function_service_account="default", + ) def square(x): return x * x @@ -1233,7 +1274,12 @@ def test_remote_function_anonymous_dataset(session, scalars_dfs): # function in the bigframes session's anonymous dataset. Use reuse=False # param to make sure parallel instances of the test don't step over each # other due to the common anonymous dataset. - @session.remote_function([int], int, reuse=False) + @session.remote_function( + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + ) def square(x): return x * x @@ -1291,7 +1337,10 @@ def test_remote_function_via_session_custom_sa(scalars_dfs): try: @rf_session.remote_function( - [int], int, reuse=False, cloud_function_service_account=gcf_service_account + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account=gcf_service_account, ) def square_num(x): if x is None: @@ -1322,38 +1371,12 @@ def square_num(x): ) -@pytest.mark.parametrize( - ("remote_function_args"), - [ - pytest.param( - {}, - id="no-set", - ), - pytest.param( - {"cloud_function_service_account": None}, - id="set-none", - ), - ], -) -def test_remote_function_warns_default_cloud_function_service_account( - session, remote_function_args -): - with pytest.warns(FutureWarning) as record: - session.remote_function(**remote_function_args) - - len( - [ - warn - for warn in record - if re.search( - ( - "You have not explicitly set a user-managed.*Using the default Compute Engine.*service account" - ), - typing.cast(FutureWarning, warn.message).args[0], - re.DOTALL, - ) - ] - ) == 1 +def test_remote_function_throws_none_cloud_function_service_account(session): + with pytest.raises( + ValueError, + match='^You must provide a user managed cloud_function_service_account, or "default" if you would like to let the default service account be used.$', + ): + session.remote_function(cloud_function_service_account=None) @pytest.mark.flaky(retries=2, delay=120) @@ -1378,9 +1401,10 @@ def test_remote_function_with_gcf_cmek(): try: @session.remote_function( - [int], - int, + input_types=[int], + output_type=int, reuse=False, + cloud_function_service_account="default", cloud_function_kms_key_name=cmek, cloud_function_docker_repository=docker_repository, ) @@ -1453,7 +1477,11 @@ def square_num(x): return x * x square_num_remote = rf_session.remote_function( - [int], int, reuse=False, cloud_function_vpc_connector=gcf_vpc_connector + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + cloud_function_vpc_connector=gcf_vpc_connector, )(square_num) scalars_df, scalars_pandas_df = scalars_dfs @@ -1495,7 +1523,11 @@ def square(x): return x * x square_remote = session.remote_function( - [int], int, reuse=False, max_batching_rows=max_batching_rows + input_types=[int], + output_type=int, + reuse=False, + max_batching_rows=max_batching_rows, + cloud_function_service_account="default", )(square) bq_routine = session.bqclient.get_routine( @@ -1534,7 +1566,11 @@ def square(x): return x * x square_remote = session.remote_function( - [int], int, reuse=False, **timeout_args + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + **timeout_args, )(square) # Assert that the GCF is created with the intended maximum timeout @@ -1560,7 +1596,13 @@ def square(x): def test_remote_function_gcf_timeout_max_supported_exceeded(session): with pytest.raises(ValueError): - @session.remote_function([int], int, reuse=False, cloud_function_timeout=1201) + @session.remote_function( + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + cloud_function_timeout=1201, + ) def square(x): return x * x @@ -1583,7 +1625,11 @@ def square(x): return x * x square_remote = session.remote_function( - [int], int, reuse=False, **max_instances_args + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + **max_instances_args, )(square) # Assert that the GCF is created with the intended max instance count @@ -1632,7 +1678,10 @@ def serialize_row(row): ) serialize_row_remote = session.remote_function( - bigframes.series.Series, str, reuse=False + input_types=bigframes.series.Series, + output_type=str, + reuse=False, + cloud_function_service_account="default", )(serialize_row) assert getattr(serialize_row_remote, "is_row_processor") @@ -1678,7 +1727,10 @@ def analyze(row): ) analyze_remote = session.remote_function( - bigframes.series.Series, str, reuse=False + input_types=bigframes.series.Series, + output_type=str, + reuse=False, + cloud_function_service_account="default", )(analyze) assert getattr(analyze_remote, "is_row_processor") @@ -1799,7 +1851,10 @@ def serialize_row(row): ) serialize_row_remote = session.remote_function( - bigframes.series.Series, str, reuse=False + input_types=bigframes.series.Series, + output_type=str, + reuse=False, + cloud_function_service_account="default", )(serialize_row) assert getattr(serialize_row_remote, "is_row_processor") @@ -1856,7 +1911,10 @@ def float_parser(row): return float(row["text"]) float_parser_remote = session.remote_function( - bigframes.series.Series, float, reuse=False + input_types=bigframes.series.Series, + output_type=float, + reuse=False, + cloud_function_service_account="default", )(float_parser) assert getattr(float_parser_remote, "is_row_processor") @@ -1901,7 +1959,9 @@ def test_remote_function_gcf_memory( def square(x: int) -> int: return x * x - square_remote = session.remote_function(reuse=False, **memory_mib_args)(square) + square_remote = session.remote_function( + reuse=False, cloud_function_service_account="default", **memory_mib_args + )(square) # Assert that the GCF is created with the intended memory gcf = session.cloudfunctionsclient.get_function( @@ -1936,7 +1996,11 @@ def test_remote_function_gcf_memory_unsupported(session, memory_mib): match="Invalid value specified for container memory", ): - @session.remote_function(reuse=False, cloud_function_memory_mib=memory_mib) + @session.remote_function( + reuse=False, + cloud_function_service_account="default", + cloud_function_memory_mib=memory_mib, + ) def square(x: int) -> int: return x * x @@ -1947,7 +2011,7 @@ def test_remote_function_unnamed_removed_w_session_cleanup(): session = bigframes.connect() # create an unnamed remote function in the session - @session.remote_function(reuse=False) + @session.remote_function(reuse=False, cloud_function_service_account="default") def foo(x: int) -> int: return x + 1 @@ -1989,7 +2053,9 @@ def test_remote_function_named_perists_w_session_cleanup(): name = test_utils.prefixer.Prefixer("bigframes", "").create_prefix() # create an unnamed remote function in the session - @session.remote_function(reuse=False, name=name) + @session.remote_function( + reuse=False, name=name, cloud_function_service_account="default" + ) def foo(x: int) -> int: return x + 1 @@ -2030,14 +2096,16 @@ def test_remote_function_clean_up_by_session_id(): # without it, and later confirm that the former is deleted when the session # is cleaned up by session id, but the latter remains ## unnamed - @session.remote_function(reuse=False) + @session.remote_function(reuse=False, cloud_function_service_account="default") def foo_unnamed(x: int) -> int: return x + 1 ## named rf_name = test_utils.prefixer.Prefixer("bigframes", "").create_prefix() - @session.remote_function(reuse=False, name=rf_name) + @session.remote_function( + reuse=False, name=rf_name, cloud_function_service_account="default" + ) def foo_named(x: int) -> int: return x + 2 @@ -2104,7 +2172,12 @@ def test_df_apply_axis_1_multiple_params(session): try: - @session.remote_function([int, float, str], str, reuse=False) + @session.remote_function( + input_types=[int, float, str], + output_type=str, + reuse=False, + cloud_function_service_account="default", + ) def foo(x, y, z): return f"I got {x}, {y} and {z}" @@ -2179,7 +2252,12 @@ def test_df_apply_axis_1_multiple_params_array_output(session): try: - @session.remote_function([int, float, str], list[str], reuse=False) + @session.remote_function( + input_types=[int, float, str], + output_type=list[str], + reuse=False, + cloud_function_service_account="default", + ) def foo(x, y, z): return [str(x), str(y), z] @@ -2259,7 +2337,12 @@ def test_df_apply_axis_1_single_param_non_series(session): try: - @session.remote_function([int], str, reuse=False) + @session.remote_function( + input_types=[int], + output_type=str, + reuse=False, + cloud_function_service_account="default", + ) def foo(x): return f"I got {x}" @@ -2313,7 +2396,7 @@ def test_df_apply_axis_1_array_output(session, scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs try: - @session.remote_function(reuse=False) + @session.remote_function(reuse=False, cloud_function_service_account="default") def generate_stats(row: pandas.Series) -> list[int]: import pandas as pd @@ -2402,7 +2485,9 @@ def square(x: int) -> int: return x * x square_remote = session.remote_function( - reuse=False, **ingress_settings_args + reuse=False, + cloud_function_service_account="default", + **ingress_settings_args, )(square) default_ingress_setting_warnings = [ @@ -2443,7 +2528,11 @@ def test_remote_function_ingress_settings_unsupported(session): ValueError, match="'unknown' not one of the supported ingress settings values" ): - @session.remote_function(reuse=False, cloud_function_ingress_settings="unknown") + @session.remote_function( + reuse=False, + cloud_function_service_account="default", + cloud_function_ingress_settings="unknown", + ) def square(x: int) -> int: return x * x @@ -2475,6 +2564,7 @@ def add_one(x: int) -> int: dataset=dataset_id, bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", )(add_one) temporary_bigquery_remote_function = ( @@ -2552,6 +2642,7 @@ def add_one(x: int) -> int: bigquery_connection=bq_cf_connection, reuse=False, name=name, + cloud_function_service_account="default", )(add_one) persistent_bigquery_remote_function = ( @@ -2619,6 +2710,7 @@ def test_remote_function_array_output( dataset=dataset_id, bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def featurize(x: int) -> list[array_dtype]: # type: ignore return [array_dtype(i) for i in [x, x + 1, x + 2]] @@ -2657,6 +2749,7 @@ def test_remote_function_array_output_partial_ordering_mode( dataset=dataset_id, bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def featurize(x: float) -> list[float]: # type: ignore return [x, x + 1, x + 2] @@ -2698,6 +2791,7 @@ def test_remote_function_array_output_multiindex( dataset=dataset_id, bigquery_connection=bq_cf_connection, reuse=False, + cloud_function_service_account="default", ) def featurize(x: int) -> list[float]: return [x, x + 0.5, x + 0.33] diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index 0af7f4e42e..8387f96303 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -107,8 +107,8 @@ def square(x): return x * x square = bff.remote_function( - int, - int, + input_types=int, + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, cloud_functions_client=cloudfunctions_client, @@ -118,6 +118,7 @@ def square(x): # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -167,8 +168,8 @@ def square(x): return x * x square = bff.remote_function( - int, - int, + input_types=int, + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, cloud_functions_client=cloudfunctions_client, @@ -178,6 +179,7 @@ def square(x): # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -226,8 +228,8 @@ def square(x): match=re.escape("The location does not match BigQuery connection location:"), ): bff.remote_function( - int, - int, + input_types=int, + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, cloud_functions_client=cloudfunctions_client, @@ -237,6 +239,7 @@ def square(x): # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) @@ -254,8 +257,8 @@ def square(x): return x * x square = bff.remote_function( - int, - int, + input_types=int, + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, cloud_functions_client=cloudfunctions_client, @@ -265,6 +268,7 @@ def square(x): # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -315,8 +319,8 @@ def square(x): ), ): bff.remote_function( - int, - int, + input_types=int, + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, cloud_functions_client=cloudfunctions_client, @@ -326,6 +330,7 @@ def square(x): # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) @@ -337,11 +342,12 @@ def square(x): return x * x square = bff.remote_function( - int, - int, + input_types=int, + output_type=int, session=session_with_bq_connection, dataset=dataset_id_permanent, name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -386,7 +392,11 @@ def square(x): # udf is same as the one used in other tests in this file so the underlying # cloud function would be common and quickly reused. square = session_with_bq_connection.remote_function( - int, int, dataset_id_permanent, name=get_function_name(square) + input_types=int, + output_type=int, + dataset=dataset_id_permanent, + name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -424,13 +434,14 @@ def square(x): return x * x square = session.remote_function( - int, - int, - dataset_id_permanent, - bq_cf_connection, + input_types=int, + output_type=int, + dataset=dataset_id_permanent, + bigquery_connection=bq_cf_connection, # See e2e tests for tests that actually deploy the Cloud Function. reuse=True, name=get_function_name(square), + cloud_function_service_account="default", )(square) # Function should still work normally. @@ -468,7 +479,11 @@ def add_one(x): return x + 1 remote_add_one = session_with_bq_connection.remote_function( - [int], int, dataset_id_permanent, name=get_function_name(add_one) + input_types=[int], + output_type=int, + dataset=dataset_id_permanent, + name=get_function_name(add_one), + cloud_function_service_account="default", )(add_one) scalars_df, scalars_pandas_df = scalars_dfs @@ -499,7 +514,11 @@ def add_one(x): return x + 1 remote_add_one = session_with_bq_connection.remote_function( - [int], int, dataset_id_permanent, name=get_function_name(add_one) + input_types=[int], + output_type=int, + dataset=dataset_id_permanent, + name=get_function_name(add_one), + cloud_function_service_account="default", )(add_one) scalars_df, scalars_pandas_df = scalars_dfs @@ -530,7 +549,11 @@ def add_one(x): return x + 1 remote_add_one = session_with_bq_connection.remote_function( - [int], int, dataset_id_permanent, name=get_function_name(add_one) + input_types=[int], + output_type=int, + dataset=dataset_id_permanent, + name=get_function_name(add_one), + cloud_function_service_account="default", )(add_one) scalars_df, scalars_pandas_df = scalars_dfs @@ -576,6 +599,7 @@ def bytes_to_hex(mybytes: bytes) -> bytes: dataset=dataset_id_permanent, name=get_function_name(bytes_to_hex, package_requirements=packages), packages=packages, + cloud_function_service_account="default", )(bytes_to_hex) bf_result = scalars_df.bytes_col.map(remote_bytes_to_hex).to_pandas() @@ -618,10 +642,11 @@ def add_one(x): return x + 1 # pragma: NO COVER session.remote_function( - [int], - int, + input_types=[int], + output_type=int, dataset=dataset_id_permanent, name=get_function_name(add_one), + cloud_function_service_account="default", )(add_one) @@ -651,8 +676,8 @@ def square1(x): return x * x square1 = bff.remote_function( - [int], - int, + input_types=[int], + output_type=int, bigquery_client=bigquery_client, bigquery_connection_client=bigqueryconnection_client, dataset=dataset_id_permanent, @@ -661,6 +686,7 @@ def square1(x): bigquery_connection=bq_cf_connection, reuse=True, name=get_function_name(square1), + cloud_function_service_account="default", )(square1) # Function should still work normally. @@ -1135,10 +1161,11 @@ def add_ints(row): match="input_types=Series is in preview.", ): add_ints_remote = session.remote_function( - bigframes.series.Series, - int, - dataset_id_permanent, + input_types=bigframes.series.Series, + output_type=int, + dataset=dataset_id_permanent, name=get_function_name(add_ints, is_row_processor=True), + cloud_function_service_account="default", )(add_ints) assert add_ints_remote.bigframes_remote_function # type: ignore assert add_ints_remote.bigframes_bigquery_function # type: ignore @@ -1187,10 +1214,11 @@ def add_ints(row): return row["int64_col"] + row["int64_too"] add_ints_remote = session.remote_function( - bigframes.series.Series, - int, - dataset_id_permanent, + input_types=bigframes.series.Series, + output_type=int, + dataset=dataset_id_permanent, name=get_function_name(add_ints, is_row_processor=True), + cloud_function_service_account="default", )(add_ints) bf_result = ( @@ -1226,10 +1254,11 @@ def add_numbers(row): return row["x"] + row["y"] add_numbers_remote = session.remote_function( - bigframes.series.Series, - float, - dataset_id_permanent, + input_types=bigframes.series.Series, + output_type=float, + dataset=dataset_id_permanent, name=get_function_name(add_numbers, is_row_processor=True), + cloud_function_service_account="default", )(add_numbers) bf_result = bf_df.apply(add_numbers_remote, axis=1).to_pandas() @@ -1279,10 +1308,11 @@ def echo_len(row): return len(row) echo_len_remote = session.remote_function( - bigframes.series.Series, - float, - dataset_id_permanent, + input_types=bigframes.series.Series, + output_type=float, + dataset=dataset_id_permanent, name=get_function_name(echo_len, is_row_processor=True), + cloud_function_service_account="default", )(echo_len) for column in columns_with_not_supported_dtypes: @@ -1315,7 +1345,9 @@ def should_mask(name: str) -> bool: assert "name" in inspect.signature(should_mask).parameters should_mask = session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(should_mask) + dataset=dataset_id_permanent, + name=get_function_name(should_mask), + cloud_function_service_account="default", )(should_mask) s = bigframes.series.Series(["Alice", "Bob", "Caroline"]) @@ -1374,7 +1406,9 @@ def is_odd(x: int) -> bool: # create a remote function is_odd_remote = session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(is_odd) + dataset=dataset_id_permanent, + name=get_function_name(is_odd), + cloud_function_service_account="default", )(is_odd) # with nulls in the series the remote function application would fail @@ -1424,7 +1458,9 @@ def add(x: int, y: int) -> int: # create a remote function add_remote = session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(add) + dataset=dataset_id_permanent, + name=get_function_name(add), + cloud_function_service_account="default", )(add) # with nulls in the series the remote function application would fail @@ -1477,7 +1513,9 @@ def add(x: int, y: int, z: float) -> float: # create a remote function add_remote = session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(add) + dataset=dataset_id_permanent, + name=get_function_name(add), + cloud_function_service_account="default", )(add) # pandas does not support nary functions, so let's create a proxy function @@ -1533,6 +1571,7 @@ def is_long_duration(minutes: int) -> bool: is_long_duration = unordered_session.remote_function( dataset=dataset_id_permanent, name=get_function_name(is_long_duration), + cloud_function_service_account="default", )(is_long_duration) method = getattr(df["duration_minutes"], method) @@ -1551,7 +1590,9 @@ def combiner(x: int, y: int) -> int: return x combiner = unordered_session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(combiner) + dataset=dataset_id_permanent, + name=get_function_name(combiner), + cloud_function_service_account="default", )(combiner) df = scalars_df_index[["int64_col", "int64_too", "float64_col", "string_col"]] @@ -1567,7 +1608,9 @@ def processor(x: int, y: int, z: float, w: str) -> str: return f"I got x={x}, y={y}, z={z} and w={w}" processor = unordered_session.remote_function( - dataset=dataset_id_permanent, name=get_function_name(processor) + dataset=dataset_id_permanent, + name=get_function_name(processor), + cloud_function_service_account="default", )(processor) df = scalars_df_index[["int64_col", "int64_too", "float64_col", "string_col"]] diff --git a/tests/unit/functions/test_remote_function.py b/tests/unit/functions/test_remote_function.py index d377fb4d49..56003abf2d 100644 --- a/tests/unit/functions/test_remote_function.py +++ b/tests/unit/functions/test_remote_function.py @@ -42,7 +42,9 @@ def test_series_input_types_to_str(series_type): """Check that is_row_processor=True uses str as the input type to serialize a row.""" session = resources.create_bigquery_session() - remote_function_decorator = bff.remote_function(session=session) + remote_function_decorator = bff.remote_function( + session=session, cloud_function_service_account="default" + ) with pytest.warns( bigframes.exceptions.PreviewWarning, @@ -79,7 +81,9 @@ def test_supported_types_correspond(): def test_missing_input_types(): session = resources.create_bigquery_session() - remote_function_decorator = bff.remote_function(session=session) + remote_function_decorator = bff.remote_function( + session=session, cloud_function_service_account="default" + ) def function_without_parameter_annotations(myparam) -> str: return str(myparam) @@ -95,7 +99,9 @@ def function_without_parameter_annotations(myparam) -> str: def test_missing_output_type(): session = resources.create_bigquery_session() - remote_function_decorator = bff.remote_function(session=session) + remote_function_decorator = bff.remote_function( + session=session, cloud_function_service_account="default" + ) def function_without_return_annotation(myparam: int): return str(myparam) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index e59232ee85..8f3e150606 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4433,7 +4433,7 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: to potentially reuse a previously deployed ``remote_function`` from the same user defined function. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def minutes_to_hours(x: int) -> float: ... return x/60 @@ -4813,7 +4813,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs): to select only the necessary columns before calling `apply()`. Note: This feature is currently in **preview**. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def foo(row: pd.Series) -> int: ... result = 1 ... result += row["col1"] @@ -4828,7 +4828,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs): You could return an array output for every input row from the remote function. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def marks_analyzer(marks: pd.Series) -> list[float]: ... import statistics ... average = marks.mean() @@ -4869,7 +4869,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs): [2 rows x 3 columns] - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def foo(x: int, y: int, z: int) -> float: ... result = 1 ... result += x diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 913a2e7c3e..a2d0983652 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1854,7 +1854,7 @@ def apply( to potentially reuse a previously deployed `remote_function` from the same user defined function. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def minutes_to_hours(x: int) -> float: ... return x/60 @@ -1883,6 +1883,7 @@ def apply( >>> @bpd.remote_function( ... reuse=False, ... packages=["cryptography"], + ... cloud_function_service_account="default" ... ) ... def get_hash(input: str) -> str: ... from cryptography.fernet import Fernet @@ -1900,7 +1901,7 @@ def apply( You could return an array output from the remote function. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def text_analyzer(text: str) -> list[int]: ... words = text.count(" ") + 1 ... periods = text.count(".") @@ -5069,7 +5070,7 @@ def mask(self, cond, other): condition is evaluated based on a complicated business logic which cannot be expressed in form of a Series. - >>> @bpd.remote_function(reuse=False) + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") ... def should_mask(name: str) -> bool: ... hash = 0 ... for char_ in name: @@ -5665,7 +5666,7 @@ def map( It also accepts a remote function: - >>> @bpd.remote_function() + >>> @bpd.remote_function(cloud_function_service_account="default") ... def my_mapper(val: str) -> str: ... vowels = ["a", "e", "i", "o", "u"] ... if val: