From c0ac17a1294b3b4a0b6a521630a08897f743d745 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Tue, 25 Mar 2025 23:57:05 +0000 Subject: [PATCH 01/10] chore!: drop support for locational endpoints BREAKING CHANGE: Locational endpoints support is not available in BigFrames 2.0. --- bigframes/_config/bigquery_options.py | 10 ++---- bigframes/constants.py | 15 ++++---- bigframes/session/clients.py | 49 ++++++++++----------------- tests/system/large/test_location.py | 17 +++------- 4 files changed, 32 insertions(+), 59 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 84bc4f6d01..96febf268f 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -265,10 +265,6 @@ def use_regional_endpoints(self) -> bool: in regions "europe-west3", "europe-west8", "europe-west9", "me-central2", "us-central1", "us-central2", "us-east1", "us-east4", "us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3" - and "us-west4". - - .. deprecated:: 0.13.0 - Use of locational endpoints is available only in selected projects. Requires that ``location`` is set. For supported regions, for example ``europe-west3``, you need to specify ``location='europe-west3'`` and @@ -276,9 +272,9 @@ def use_regional_endpoints(self) -> bool: connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``. For not supported regions, for example ``asia-northeast1``, when you specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``, - a different endpoint (called locational endpoint, now deprecated, used - to provide weaker promise on the request remaining within the location - during transit) ``europe-west3-bigquery.googleapis.com`` would be used. + the global endpoint ``bigquery.googleapis.com`` would be used, which + does not promise any guarantee on the request remaining within the + location during transit. Returns: bool: diff --git a/bigframes/constants.py b/bigframes/constants.py index 8f5ed95e1a..c6025ac45d 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -96,22 +96,19 @@ } ) -# https://cloud.google.com/storage/docs/locational-endpoints -LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset( +REP_NOT_ENABLED_BIGQUERY_LOCATIONS = frozenset( ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS ) -LEP_DEPRECATION_WARNING_MESSAGE = textwrap.dedent( +REP_NOT_SUPPOERTED_WARNING_MESSAGE = textwrap.dedent( """ Support for regional endpoints is not yet available in the location {location} for BigQuery and BigQuery Storage APIs. For the supported locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints. - For other locations and APIs, currently an older, now deprecated locational - endpoints are being used, which requires your project to be allowlisted. In - future version 2.0 onwards the locational endpoints will no longer be - supported automatically when you enable regional endpoints. However, if you - still need them, you will be able to override the endpoints directly by - doing the following: + For other locations we are falling back to the global APIs. If you have the + previously supported (now deprecated) locational endpoints enabled in your + project (which requires your project to be allowlisted), you can override + the endpoints directly by doing the following: bigframes.pandas.options.bigquery.client_endpoints_override = {{ "bqclient": "https://{location}-bigquery.googleapis.com", "bqconnectionclient": "{location}-bigqueryconnection.googleapis.com", diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 5b707ad478..cb0d9275dc 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -41,13 +41,10 @@ # BigQuery is a REST API, which requires the protocol as part of the URL. -_BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com" _BIGQUERY_REGIONAL_ENDPOINT = "https://bigquery.{location}.rep.googleapis.com" # BigQuery Connection and Storage are gRPC APIs, which don't support the # https:// protocol in the API endpoint URL. -_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com" -_BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com" _BIGQUERYSTORAGE_REGIONAL_ENDPOINT = ( "https://bigquerystorage.{location}.rep.googleapis.com" ) @@ -104,12 +101,12 @@ def __init__( not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS ): msg = bfe.format_message( - bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format( + bigframes.constants.REP_NOT_SUPPOERTED_WARNING_MESSAGE.format( location=location ), fill=False, ) - warnings.warn(msg, category=FutureWarning) + warnings.warn(msg, category=ResourceWarning) self._location = location self._use_regional_endpoints = use_regional_endpoints @@ -138,17 +135,14 @@ def _create_bigquery_client(self): bq_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqclient"] ) - elif self._use_regional_endpoints: - endpoint_template = _BIGQUERY_REGIONAL_ENDPOINT - if ( - self._location is not None - and self._location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - endpoint_template = _BIGQUERY_LOCATIONAL_ENDPOINT - + elif ( + self._use_regional_endpoints + and self._location is not None + and self._location.lower() + in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS + ): bq_options = google.api_core.client_options.ClientOptions( - api_endpoint=endpoint_template.format(location=self._location) + api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location) ) bq_info = google.api_core.client_info.ClientInfo( @@ -195,12 +189,6 @@ def bqconnectionclient(self): bqconnection_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqconnectionclient"] ) - elif self._use_regional_endpoints: - bqconnection_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT.format( - location=self._location - ) - ) bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( user_agent=self._application_name @@ -223,17 +211,16 @@ def bqstoragereadclient(self): bqstorage_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqstoragereadclient"] ) - elif self._use_regional_endpoints: - endpoint_template = _BIGQUERYSTORAGE_REGIONAL_ENDPOINT - if ( - self._location is not None - and self._location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - endpoint_template = _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT - + elif ( + self._use_regional_endpoints + and self._location is not None + and self._location.lower() + in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS + ): bqstorage_options = google.api_core.client_options.ClientOptions( - api_endpoint=endpoint_template.format(location=self._location) + api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( + location=self._location + ) ) bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo( diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 7801f5dada..0efdbe8068 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -127,7 +127,7 @@ def test_bq_rep_endpoints(bigquery_location): ) ) assert ( - len([warn for warn in record if isinstance(warn.message, FutureWarning)]) + len([warn for warn in record if isinstance(warn.message, ResourceWarning)]) == 0 ) @@ -150,15 +150,10 @@ def test_bq_rep_endpoints(bigquery_location): @pytest.mark.parametrize( "bigquery_location", # Sort the set to avoid nondeterminism. - sorted(bigframes.constants.LEP_ENABLED_BIGQUERY_LOCATIONS), + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), ) -def test_bq_lep_endpoints(bigquery_location): - # We are not testing BigFrames Session for LEP endpoints because it involves - # query execution using the endpoint, which requires the project to be - # allowlisted for LEP access. We could hardcode one project which is - # allowlisted but then not every open source developer will have access to - # that. Let's rely on just creating the clients for LEP. - with pytest.warns(FutureWarning) as record: +def test_bq_non_rep_endpoints(bigquery_location): + with pytest.warns(ResourceWarning) as record: clients_provider = bigframes.session.clients.ClientsProvider( location=bigquery_location, use_regional_endpoints=True ) @@ -172,7 +167,5 @@ def test_bq_lep_endpoints(bigquery_location): assert clients_provider.bqclient.location == bigquery_location assert ( clients_provider.bqclient._connection.API_BASE_URL - == "https://{location}-bigquery.googleapis.com".format( - location=bigquery_location - ) + == "https://bigquery.googleapis.com" ) From 836d2c62ede92f628777c4fc57fcf9dcc5c84ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 27 Mar 2025 09:59:36 -0500 Subject: [PATCH 02/10] Update bigframes/_config/bigquery_options.py --- bigframes/_config/bigquery_options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 96febf268f..5418d20b61 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -265,6 +265,7 @@ def use_regional_endpoints(self) -> bool: in regions "europe-west3", "europe-west8", "europe-west9", "me-central2", "us-central1", "us-central2", "us-east1", "us-east4", "us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3" + and "us-west4". Requires that ``location`` is set. For supported regions, for example ``europe-west3``, you need to specify ``location='europe-west3'`` and From 79d1c53bfce3d5ec7b0f5b2417b9a2dbc54578bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 27 Mar 2025 10:13:49 -0500 Subject: [PATCH 03/10] still try the regional endpoint even if not in the client-side list of supported regions --- bigframes/constants.py | 5 ++--- bigframes/session/clients.py | 22 ++++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index c6025ac45d..b279a88056 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -102,11 +102,10 @@ REP_NOT_SUPPOERTED_WARNING_MESSAGE = textwrap.dedent( """ - Support for regional endpoints is not yet available in the location + Support for regional endpoints may not be available in the location {location} for BigQuery and BigQuery Storage APIs. For the supported locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints. - For other locations we are falling back to the global APIs. If you have the - previously supported (now deprecated) locational endpoints enabled in your + If you have the (deprecated) locational endpoints enabled in your project (which requires your project to be allowlisted), you can override the endpoints directly by doing the following: bigframes.pandas.options.bigquery.client_endpoints_override = {{ diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index cb0d9275dc..0174c8d394 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -135,12 +135,11 @@ def _create_bigquery_client(self): bq_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqclient"] ) - elif ( - self._use_regional_endpoints - and self._location is not None - and self._location.lower() - in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): + elif self._use_regional_endpoints: + if self._location: + raise ValueError( + "Must set bpd.options.bigquery.location to use regional endpoints. Got None." + ) bq_options = google.api_core.client_options.ClientOptions( api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location) ) @@ -211,12 +210,11 @@ def bqstoragereadclient(self): bqstorage_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqstoragereadclient"] ) - elif ( - self._use_regional_endpoints - and self._location is not None - and self._location.lower() - in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): + elif self._use_regional_endpoints: + if self._location: + raise ValueError( + "Must set bpd.options.bigquery.location to use regional endpoints. Got None." + ) bqstorage_options = google.api_core.client_options.ClientOptions( api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( location=self._location From 7be2e38c8c8f39a1b62847eba677136e68227e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 27 Mar 2025 10:20:38 -0500 Subject: [PATCH 04/10] add bq connection regional url template --- bigframes/session/clients.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 0174c8d394..84da14c045 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -45,9 +45,8 @@ # BigQuery Connection and Storage are gRPC APIs, which don't support the # https:// protocol in the API endpoint URL. -_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = ( - "https://bigquerystorage.{location}.rep.googleapis.com" -) +_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = "bigqueryconnection.{location}.rep.googleapis.com" +_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com" def _get_default_credentials_with_project(): @@ -188,6 +187,16 @@ def bqconnectionclient(self): bqconnection_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqconnectionclient"] ) + elif self._use_regional_endpoints: + if self._location: + raise ValueError( + "Must set bpd.options.bigquery.location to use regional endpoints. Got None." + ) + bqstorage_options = google.api_core.client_options.ClientOptions( + api_endpoint=_BIGQUERYCONNECTION_REGIONAL_ENDPOINT.format( + location=self._location + ) + ) bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( user_agent=self._application_name From 157611fe01275602ff05c5adbee938d891169b52 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Mar 2025 15:23:15 +0000 Subject: [PATCH 05/10] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/session/clients.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 84da14c045..082753f523 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -45,7 +45,9 @@ # BigQuery Connection and Storage are gRPC APIs, which don't support the # https:// protocol in the API endpoint URL. -_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = "bigqueryconnection.{location}.rep.googleapis.com" +_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = ( + "bigqueryconnection.{location}.rep.googleapis.com" +) _BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com" From 0ce3869a048fdba64a8dc853155602cce0a5df46 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 27 Mar 2025 18:59:49 +0000 Subject: [PATCH 06/10] move location validation to a common place --- bigframes/constants.py | 11 ++++++++- bigframes/session/clients.py | 45 ++++++++++-------------------------- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index b279a88056..7f4d5eb8d3 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -100,7 +100,16 @@ ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS ) -REP_NOT_SUPPOERTED_WARNING_MESSAGE = textwrap.dedent( +LOCATION_NEEDED_FOR_REP_MESSAGE = textwrap.dedent( + """ + Must set location to use regional endpoints. + You can do it via bigframaes.pandas.options.bigquery.location. + The supported locations can be found at + https://cloud.google.com/bigquery/docs/regional-endpoints#supported-locations. + """ +).strip() + +REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent( """ Support for regional endpoints may not be available in the location {location} for BigQuery and BigQuery Storage APIs. For the supported diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 082753f523..cdcf4f7314 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -17,7 +17,6 @@ import os import typing from typing import Optional -import warnings import google.api_core.client_info import google.api_core.client_options @@ -32,7 +31,6 @@ import pydata_google_auth import bigframes.constants -import bigframes.exceptions as bfe import bigframes.version _ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT" @@ -95,19 +93,18 @@ def __init__( ) self._project = project - if ( - use_regional_endpoints - and location is not None - and location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - msg = bfe.format_message( - bigframes.constants.REP_NOT_SUPPOERTED_WARNING_MESSAGE.format( - location=location - ), - fill=False, - ) - warnings.warn(msg, category=ResourceWarning) + if use_regional_endpoints: + if location is None: + raise ValueError(bigframes.constants.LOCATION_NEEDED_FOR_REP_MESSAGE) + elif ( + location.lower() + not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS + ): + raise ValueError( + bigframes.constants.REP_NOT_SUPPORTED_MESSAGE.format( + location=location + ) + ) self._location = location self._use_regional_endpoints = use_regional_endpoints @@ -137,10 +134,6 @@ def _create_bigquery_client(self): api_endpoint=self._client_endpoints_override["bqclient"] ) elif self._use_regional_endpoints: - if self._location: - raise ValueError( - "Must set bpd.options.bigquery.location to use regional endpoints. Got None." - ) bq_options = google.api_core.client_options.ClientOptions( api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location) ) @@ -189,16 +182,6 @@ def bqconnectionclient(self): bqconnection_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqconnectionclient"] ) - elif self._use_regional_endpoints: - if self._location: - raise ValueError( - "Must set bpd.options.bigquery.location to use regional endpoints. Got None." - ) - bqstorage_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERYCONNECTION_REGIONAL_ENDPOINT.format( - location=self._location - ) - ) bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( user_agent=self._application_name @@ -222,10 +205,6 @@ def bqstoragereadclient(self): api_endpoint=self._client_endpoints_override["bqstoragereadclient"] ) elif self._use_regional_endpoints: - if self._location: - raise ValueError( - "Must set bpd.options.bigquery.location to use regional endpoints. Got None." - ) bqstorage_options = google.api_core.client_options.ClientOptions( api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( location=self._location From 9fbba9e0dd14229a12d8d31e7c15ce8f35565d3d Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 27 Mar 2025 22:46:50 +0000 Subject: [PATCH 07/10] add test for storage read client --- bigframes/session/clients.py | 3 -- tests/system/large/test_location.py | 84 ++++++++++++++++------------- tests/unit/session/test_clients.py | 20 +++++++ tests/unit/session/test_session.py | 20 +++++++ 4 files changed, 86 insertions(+), 41 deletions(-) diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index cdcf4f7314..4d022a9f7d 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -43,9 +43,6 @@ # BigQuery Connection and Storage are gRPC APIs, which don't support the # https:// protocol in the API endpoint URL. -_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = ( - "bigqueryconnection.{location}.rep.googleapis.com" -) _BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com" diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 0efdbe8068..17d77ac280 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -13,9 +13,11 @@ # limitations under the License. import typing -import warnings from google.cloud import bigquery +from google.cloud.bigquery_storage import types as bqstorage_types +import pandas +import pandas.testing import pytest import bigframes @@ -41,6 +43,7 @@ def _assert_bq_execution_location( assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location + # Ensure operation involving BQ client suceeds result = ( df[["name", "number"]] .groupby("name") @@ -53,6 +56,35 @@ def _assert_bq_execution_location( typing.cast(bigquery.QueryJob, result.query_job).location == expected_location ) + expected_result = pandas.DataFrame( + {"number": [444, 222]}, index=pandas.Index(["aaa", "bbb"], name="name") + ) + pandas.testing.assert_frame_equal( + expected_result, result.to_pandas(), check_dtype=False, check_index_type=False + ) + + # Ensure BQ Storage Read client operation succceeds + table = result.query_job.destination + requested_session = bqstorage_types.ReadSession( + table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}", + data_format=bqstorage_types.DataFormat.ARROW, + ) + read_session = session.bqstoragereadclient.create_read_session( + parent=f"projects/{table.project}", + read_session=requested_session, + max_stream_count=1, + ) + reader = session.bqstoragereadclient.read_rows(read_session.streams[0].name) + frames = [] + for message in reader.rows().pages: + frames.append(message.to_dataframe()) + read_dataframe = pandas.concat(frames) + # normalize before comparing since we lost some of the bigframes column + # naming abtractions in the direct read of the destination table + read_dataframe = read_dataframe.set_index("name") + read_dataframe.columns = result.columns + pandas.testing.assert_frame_equal(expected_result, read_dataframe) + def test_bq_location_default(): session = bigframes.Session() @@ -119,22 +151,14 @@ def test_bq_location_non_canonical(set_location, resolved_location): sorted(bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS), ) def test_bq_rep_endpoints(bigquery_location): - with warnings.catch_warnings(record=True) as record: - warnings.simplefilter("always") - session = bigframes.Session( - context=bigframes.BigQueryOptions( - location=bigquery_location, use_regional_endpoints=True - ) - ) - assert ( - len([warn for warn in record if isinstance(warn.message, ResourceWarning)]) - == 0 + session = bigframes.Session( + context=bigframes.BigQueryOptions( + location=bigquery_location, use_regional_endpoints=True ) + ) - # Verify that location and endpoints are correctly set for the BigQuery API + # Verify that location and endpoint is correctly set for the BigQuery API # client - # TODO(shobs): Figure out if the same can be verified for the other API - # clients. assert session.bqclient.location == bigquery_location assert ( session.bqclient._connection.API_BASE_URL @@ -143,29 +167,13 @@ def test_bq_rep_endpoints(bigquery_location): ) ) - # assert that bigframes session honors the location - _assert_bq_execution_location(session) - - -@pytest.mark.parametrize( - "bigquery_location", - # Sort the set to avoid nondeterminism. - sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), -) -def test_bq_non_rep_endpoints(bigquery_location): - with pytest.warns(ResourceWarning) as record: - clients_provider = bigframes.session.clients.ClientsProvider( - location=bigquery_location, use_regional_endpoints=True - ) - assert len(record) == 1 - assert bigquery_location in typing.cast(Warning, record[0].message).args[0] - - # Verify that location and endpoints are correctly set for the BigQuery API - # client - # TODO(shobs): Figure out if the same can be verified for the other API - # clients. - assert clients_provider.bqclient.location == bigquery_location + # Verify that endpoint is correctly set for the BigQuery Storage API client + # TODO(shobs): Figure out if we can verify that location is set in the + # BigQuery Storage API client. assert ( - clients_provider.bqclient._connection.API_BASE_URL - == "https://bigquery.googleapis.com" + session.bqstoragereadclient.api_endpoint + == f"bigquerystorage.{bigquery_location}.rep.googleapis.com" ) + + # assert that bigframes session honors the location + _assert_bq_execution_location(session) diff --git a/tests/unit/session/test_clients.py b/tests/unit/session/test_clients.py index 30ba2f9091..fc68f597c2 100644 --- a/tests/unit/session/test_clients.py +++ b/tests/unit/session/test_clients.py @@ -25,7 +25,9 @@ import google.cloud.bigquery_storage_v1 import google.cloud.functions_v2 import google.cloud.resourcemanager_v3 +import pytest +import bigframes.constants as constants import bigframes.session.clients as clients import bigframes.version @@ -113,3 +115,21 @@ def test_user_agent_custom(monkeypatch): # We still need to include attribution to bigframes, even if there's also a # partner using the package. assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +def test_clients_provider_no_location(): + with pytest.raises(ValueError, match="Must set location to use regional endpoints"): + clients.ClientsProvider(use_regional_endpoints=True) + + +@pytest.mark.parametrize( + "bigquery_location", + # Sort the set to avoid nondeterminism. + sorted(constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), +) +def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location): + with pytest.raises( + ValueError, + match="Support for regional endpoints may not be available in the location", + ): + clients.ClientsProvider(location=bigquery_location, use_regional_endpoints=True) diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py index b35449f291..a16792b92a 100644 --- a/tests/unit/session/test_session.py +++ b/tests/unit/session/test_session.py @@ -28,6 +28,7 @@ import bigframes from bigframes import version +import bigframes.constants import bigframes.enums import bigframes.exceptions from tests.unit import resources @@ -462,6 +463,25 @@ def today(cls): resources.create_bigquery_session() +@pytest.mark.parametrize( + "bigquery_location", + # Sort the set to avoid nondeterminism. + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), +) +def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints( + bigquery_location, +): + with pytest.raises( + ValueError, + match="Support for regional endpoints may not be available in the location", + ): + bigframes.Session( + context=bigframes.BigQueryOptions( + location=bigquery_location, use_regional_endpoints=True + ) + ) + + @mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1) def test_read_pandas_inline_exceeds_limit_raises_error(): session = resources.create_bigquery_session() From f86ee8d184ab6f286d6050bd26260c63dbd3e568 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 27 Mar 2025 23:22:47 +0000 Subject: [PATCH 08/10] `use_regional_endpoints` documentation update --- bigframes/_config/bigquery_options.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 5418d20b61..28c5ba8c29 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -258,7 +258,8 @@ def allow_large_results(self, value: bool): @property def use_regional_endpoints(self) -> bool: - """Flag to connect to regional API endpoints. + """Flag to connect to regional API endpoints for BigQuery API and + BigQuery Storage API. .. note:: Use of regional endpoints is a feature in Preview and available only @@ -267,12 +268,13 @@ def use_regional_endpoints(self) -> bool: "us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3" and "us-west4". - Requires that ``location`` is set. For supported regions, for example - ``europe-west3``, you need to specify ``location='europe-west3'`` and - ``use_regional_endpoints=True``, and then BigQuery DataFrames would - connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``. - For not supported regions, for example ``asia-northeast1``, when you - specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``, + Requires that ``location`` is set. For [supported regions](https://cloud.google.com/bigquery/docs/regional-endpoints), + for example ``europe-west3``, you need to specify + ``location='europe-west3'`` and ``use_regional_endpoints=True``, and + then BigQuery DataFrames would connect to the BigQuery endpoint + ``bigquery.europe-west3.rep.googleapis.com``. For not supported regions, + for example ``asia-northeast1``, when you specify + ``location='asia-northeast1'`` and ``use_regional_endpoints=True``, the global endpoint ``bigquery.googleapis.com`` would be used, which does not promise any guarantee on the request remaining within the location during transit. From 8ae694c7826977bf643a1b0c5dd4a6f201424257 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 28 Mar 2025 00:09:57 +0000 Subject: [PATCH 09/10] fix mypy --- tests/system/large/test_location.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 17d77ac280..ef9207bdd5 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -65,9 +65,9 @@ def _assert_bq_execution_location( # Ensure BQ Storage Read client operation succceeds table = result.query_job.destination - requested_session = bqstorage_types.ReadSession( + requested_session = bqstorage_types.ReadSession( # type: ignore[attr-defined] table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}", - data_format=bqstorage_types.DataFormat.ARROW, + data_format=bqstorage_types.DataFormat.ARROW, # type: ignore[attr-defined] ) read_session = session.bqstoragereadclient.create_read_session( parent=f"projects/{table.project}", From 4f4f16aba337a2406cb67a7847614e31916c1a94 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 28 Mar 2025 01:45:47 +0000 Subject: [PATCH 10/10] move the exception tests to large tests to test the real flow --- bigframes/constants.py | 12 ++++----- tests/system/large/test_location.py | 39 +++++++++++++++++++++++++++++ tests/unit/session/test_clients.py | 20 --------------- tests/unit/session/test_session.py | 20 --------------- 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index 7f4d5eb8d3..89f27afd78 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -111,12 +111,12 @@ REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent( """ - Support for regional endpoints may not be available in the location - {location} for BigQuery and BigQuery Storage APIs. For the supported - locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints. - If you have the (deprecated) locational endpoints enabled in your - project (which requires your project to be allowlisted), you can override - the endpoints directly by doing the following: + Support for regional endpoints for BigQuery and BigQuery Storage APIs may + not be available in the location {location}. For the supported APIs and + locations see https://cloud.google.com/bigquery/docs/regional-endpoints. + If you have the (deprecated) locational endpoints enabled in your project + (which requires your project to be allowlisted), you can override the + endpoints directly by doing the following: bigframes.pandas.options.bigquery.client_endpoints_override = {{ "bqclient": "https://{location}-bigquery.googleapis.com", "bqconnectionclient": "{location}-bigqueryconnection.googleapis.com", diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index ef9207bdd5..d4428c1f95 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -177,3 +177,42 @@ def test_bq_rep_endpoints(bigquery_location): # assert that bigframes session honors the location _assert_bq_execution_location(session) + + +def test_clients_provider_no_location(): + with pytest.raises(ValueError, match="Must set location to use regional endpoints"): + bigframes.session.clients.ClientsProvider(use_regional_endpoints=True) + + +@pytest.mark.parametrize( + "bigquery_location", + # Sort the set to avoid nondeterminism. + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), +) +def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location): + with pytest.raises( + ValueError, + match=f"not .*available in the location {bigquery_location}", + ): + bigframes.session.clients.ClientsProvider( + location=bigquery_location, use_regional_endpoints=True + ) + + +@pytest.mark.parametrize( + "bigquery_location", + # Sort the set to avoid nondeterminism. + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), +) +def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints( + bigquery_location, +): + with pytest.raises( + ValueError, + match=f"not .*available in the location {bigquery_location}", + ): + bigframes.Session( + context=bigframes.BigQueryOptions( + location=bigquery_location, use_regional_endpoints=True + ) + ) diff --git a/tests/unit/session/test_clients.py b/tests/unit/session/test_clients.py index 0de0bb22c1..c9a12be584 100644 --- a/tests/unit/session/test_clients.py +++ b/tests/unit/session/test_clients.py @@ -26,9 +26,7 @@ import google.cloud.bigquery_storage_v1 import google.cloud.functions_v2 import google.cloud.resourcemanager_v3 -import pytest -import bigframes.constants as constants import bigframes.session.clients as clients import bigframes.version @@ -183,21 +181,3 @@ def test_user_agent_in_jupyter(monkeypatch): # We still need to include attribution to bigframes assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") - - -def test_clients_provider_no_location(): - with pytest.raises(ValueError, match="Must set location to use regional endpoints"): - clients.ClientsProvider(use_regional_endpoints=True) - - -@pytest.mark.parametrize( - "bigquery_location", - # Sort the set to avoid nondeterminism. - sorted(constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), -) -def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location): - with pytest.raises( - ValueError, - match="Support for regional endpoints may not be available in the location", - ): - clients.ClientsProvider(location=bigquery_location, use_regional_endpoints=True) diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py index a16792b92a..b35449f291 100644 --- a/tests/unit/session/test_session.py +++ b/tests/unit/session/test_session.py @@ -28,7 +28,6 @@ import bigframes from bigframes import version -import bigframes.constants import bigframes.enums import bigframes.exceptions from tests.unit import resources @@ -463,25 +462,6 @@ def today(cls): resources.create_bigquery_session() -@pytest.mark.parametrize( - "bigquery_location", - # Sort the set to avoid nondeterminism. - sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), -) -def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints( - bigquery_location, -): - with pytest.raises( - ValueError, - match="Support for regional endpoints may not be available in the location", - ): - bigframes.Session( - context=bigframes.BigQueryOptions( - location=bigquery_location, use_regional_endpoints=True - ) - ) - - @mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1) def test_read_pandas_inline_exceeds_limit_raises_error(): session = resources.create_bigquery_session()