diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 7d33b7ba39..41f662c6c2 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -258,7 +258,8 @@ def allow_large_results(self, value: bool): @property def use_regional_endpoints(self) -> bool: - """Flag to connect to regional API endpoints. + """Flag to connect to regional API endpoints for BigQuery API and + BigQuery Storage API. .. note:: Use of regional endpoints is a feature in Preview and available only @@ -267,18 +268,16 @@ def use_regional_endpoints(self) -> bool: "us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3" and "us-west4". - .. deprecated:: 0.13.0 - Use of locational endpoints is available only in selected projects. - - Requires that ``location`` is set. For supported regions, for example - ``europe-west3``, you need to specify ``location='europe-west3'`` and - ``use_regional_endpoints=True``, and then BigQuery DataFrames would - connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``. - For not supported regions, for example ``asia-northeast1``, when you - specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``, - a different endpoint (called locational endpoint, now deprecated, used - to provide weaker promise on the request remaining within the location - during transit) ``europe-west3-bigquery.googleapis.com`` would be used. + Requires that ``location`` is set. For [supported regions](https://cloud.google.com/bigquery/docs/regional-endpoints), + for example ``europe-west3``, you need to specify + ``location='europe-west3'`` and ``use_regional_endpoints=True``, and + then BigQuery DataFrames would connect to the BigQuery endpoint + ``bigquery.europe-west3.rep.googleapis.com``. For not supported regions, + for example ``asia-northeast1``, when you specify + ``location='asia-northeast1'`` and ``use_regional_endpoints=True``, + the global endpoint ``bigquery.googleapis.com`` would be used, which + does not promise any guarantee on the request remaining within the + location during transit. Returns: bool: diff --git a/bigframes/constants.py b/bigframes/constants.py index 8f5ed95e1a..89f27afd78 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -96,22 +96,27 @@ } ) -# https://cloud.google.com/storage/docs/locational-endpoints -LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset( +REP_NOT_ENABLED_BIGQUERY_LOCATIONS = frozenset( ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS ) -LEP_DEPRECATION_WARNING_MESSAGE = textwrap.dedent( +LOCATION_NEEDED_FOR_REP_MESSAGE = textwrap.dedent( """ - Support for regional endpoints is not yet available in the location - {location} for BigQuery and BigQuery Storage APIs. For the supported - locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints. - For other locations and APIs, currently an older, now deprecated locational - endpoints are being used, which requires your project to be allowlisted. In - future version 2.0 onwards the locational endpoints will no longer be - supported automatically when you enable regional endpoints. However, if you - still need them, you will be able to override the endpoints directly by - doing the following: + Must set location to use regional endpoints. + You can do it via bigframaes.pandas.options.bigquery.location. + The supported locations can be found at + https://cloud.google.com/bigquery/docs/regional-endpoints#supported-locations. + """ +).strip() + +REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent( + """ + Support for regional endpoints for BigQuery and BigQuery Storage APIs may + not be available in the location {location}. For the supported APIs and + locations see https://cloud.google.com/bigquery/docs/regional-endpoints. + If you have the (deprecated) locational endpoints enabled in your project + (which requires your project to be allowlisted), you can override the + endpoints directly by doing the following: bigframes.pandas.options.bigquery.client_endpoints_override = {{ "bqclient": "https://{location}-bigquery.googleapis.com", "bqconnectionclient": "{location}-bigqueryconnection.googleapis.com", diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 2b24b6cb8b..86be8bd897 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -17,7 +17,6 @@ import os import typing from typing import Optional -import warnings import google.api_core.client_info import google.api_core.client_options @@ -32,7 +31,6 @@ import pydata_google_auth import bigframes.constants -import bigframes.exceptions as bfe import bigframes.version from . import environment @@ -43,16 +41,11 @@ # BigQuery is a REST API, which requires the protocol as part of the URL. -_BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com" _BIGQUERY_REGIONAL_ENDPOINT = "https://bigquery.{location}.rep.googleapis.com" # BigQuery Connection and Storage are gRPC APIs, which don't support the # https:// protocol in the API endpoint URL. -_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com" -_BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com" -_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = ( - "https://bigquerystorage.{location}.rep.googleapis.com" -) +_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com" def _get_default_credentials_with_project(): @@ -114,19 +107,18 @@ def __init__( ) self._project = project - if ( - use_regional_endpoints - and location is not None - and location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - msg = bfe.format_message( - bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format( - location=location - ), - fill=False, - ) - warnings.warn(msg, category=FutureWarning) + if use_regional_endpoints: + if location is None: + raise ValueError(bigframes.constants.LOCATION_NEEDED_FOR_REP_MESSAGE) + elif ( + location.lower() + not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS + ): + raise ValueError( + bigframes.constants.REP_NOT_SUPPORTED_MESSAGE.format( + location=location + ) + ) self._location = location self._use_regional_endpoints = use_regional_endpoints @@ -156,16 +148,8 @@ def _create_bigquery_client(self): api_endpoint=self._client_endpoints_override["bqclient"] ) elif self._use_regional_endpoints: - endpoint_template = _BIGQUERY_REGIONAL_ENDPOINT - if ( - self._location is not None - and self._location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - endpoint_template = _BIGQUERY_LOCATIONAL_ENDPOINT - bq_options = google.api_core.client_options.ClientOptions( - api_endpoint=endpoint_template.format(location=self._location) + api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location) ) bq_info = google.api_core.client_info.ClientInfo( @@ -212,12 +196,6 @@ def bqconnectionclient(self): bqconnection_options = google.api_core.client_options.ClientOptions( api_endpoint=self._client_endpoints_override["bqconnectionclient"] ) - elif self._use_regional_endpoints: - bqconnection_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT.format( - location=self._location - ) - ) bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( user_agent=self._application_name @@ -241,16 +219,10 @@ def bqstoragereadclient(self): api_endpoint=self._client_endpoints_override["bqstoragereadclient"] ) elif self._use_regional_endpoints: - endpoint_template = _BIGQUERYSTORAGE_REGIONAL_ENDPOINT - if ( - self._location is not None - and self._location.lower() - not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS - ): - endpoint_template = _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT - bqstorage_options = google.api_core.client_options.ClientOptions( - api_endpoint=endpoint_template.format(location=self._location) + api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( + location=self._location + ) ) bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo( diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 7801f5dada..d4428c1f95 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -13,9 +13,11 @@ # limitations under the License. import typing -import warnings from google.cloud import bigquery +from google.cloud.bigquery_storage import types as bqstorage_types +import pandas +import pandas.testing import pytest import bigframes @@ -41,6 +43,7 @@ def _assert_bq_execution_location( assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location + # Ensure operation involving BQ client suceeds result = ( df[["name", "number"]] .groupby("name") @@ -53,6 +56,35 @@ def _assert_bq_execution_location( typing.cast(bigquery.QueryJob, result.query_job).location == expected_location ) + expected_result = pandas.DataFrame( + {"number": [444, 222]}, index=pandas.Index(["aaa", "bbb"], name="name") + ) + pandas.testing.assert_frame_equal( + expected_result, result.to_pandas(), check_dtype=False, check_index_type=False + ) + + # Ensure BQ Storage Read client operation succceeds + table = result.query_job.destination + requested_session = bqstorage_types.ReadSession( # type: ignore[attr-defined] + table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}", + data_format=bqstorage_types.DataFormat.ARROW, # type: ignore[attr-defined] + ) + read_session = session.bqstoragereadclient.create_read_session( + parent=f"projects/{table.project}", + read_session=requested_session, + max_stream_count=1, + ) + reader = session.bqstoragereadclient.read_rows(read_session.streams[0].name) + frames = [] + for message in reader.rows().pages: + frames.append(message.to_dataframe()) + read_dataframe = pandas.concat(frames) + # normalize before comparing since we lost some of the bigframes column + # naming abtractions in the direct read of the destination table + read_dataframe = read_dataframe.set_index("name") + read_dataframe.columns = result.columns + pandas.testing.assert_frame_equal(expected_result, read_dataframe) + def test_bq_location_default(): session = bigframes.Session() @@ -119,22 +151,14 @@ def test_bq_location_non_canonical(set_location, resolved_location): sorted(bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS), ) def test_bq_rep_endpoints(bigquery_location): - with warnings.catch_warnings(record=True) as record: - warnings.simplefilter("always") - session = bigframes.Session( - context=bigframes.BigQueryOptions( - location=bigquery_location, use_regional_endpoints=True - ) - ) - assert ( - len([warn for warn in record if isinstance(warn.message, FutureWarning)]) - == 0 + session = bigframes.Session( + context=bigframes.BigQueryOptions( + location=bigquery_location, use_regional_endpoints=True ) + ) - # Verify that location and endpoints are correctly set for the BigQuery API + # Verify that location and endpoint is correctly set for the BigQuery API # client - # TODO(shobs): Figure out if the same can be verified for the other API - # clients. assert session.bqclient.location == bigquery_location assert ( session.bqclient._connection.API_BASE_URL @@ -143,36 +167,52 @@ def test_bq_rep_endpoints(bigquery_location): ) ) + # Verify that endpoint is correctly set for the BigQuery Storage API client + # TODO(shobs): Figure out if we can verify that location is set in the + # BigQuery Storage API client. + assert ( + session.bqstoragereadclient.api_endpoint + == f"bigquerystorage.{bigquery_location}.rep.googleapis.com" + ) + # assert that bigframes session honors the location _assert_bq_execution_location(session) +def test_clients_provider_no_location(): + with pytest.raises(ValueError, match="Must set location to use regional endpoints"): + bigframes.session.clients.ClientsProvider(use_regional_endpoints=True) + + @pytest.mark.parametrize( "bigquery_location", # Sort the set to avoid nondeterminism. - sorted(bigframes.constants.LEP_ENABLED_BIGQUERY_LOCATIONS), + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), ) -def test_bq_lep_endpoints(bigquery_location): - # We are not testing BigFrames Session for LEP endpoints because it involves - # query execution using the endpoint, which requires the project to be - # allowlisted for LEP access. We could hardcode one project which is - # allowlisted but then not every open source developer will have access to - # that. Let's rely on just creating the clients for LEP. - with pytest.warns(FutureWarning) as record: - clients_provider = bigframes.session.clients.ClientsProvider( +def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location): + with pytest.raises( + ValueError, + match=f"not .*available in the location {bigquery_location}", + ): + bigframes.session.clients.ClientsProvider( location=bigquery_location, use_regional_endpoints=True ) - assert len(record) == 1 - assert bigquery_location in typing.cast(Warning, record[0].message).args[0] - # Verify that location and endpoints are correctly set for the BigQuery API - # client - # TODO(shobs): Figure out if the same can be verified for the other API - # clients. - assert clients_provider.bqclient.location == bigquery_location - assert ( - clients_provider.bqclient._connection.API_BASE_URL - == "https://{location}-bigquery.googleapis.com".format( - location=bigquery_location + +@pytest.mark.parametrize( + "bigquery_location", + # Sort the set to avoid nondeterminism. + sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS), +) +def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints( + bigquery_location, +): + with pytest.raises( + ValueError, + match=f"not .*available in the location {bigquery_location}", + ): + bigframes.Session( + context=bigframes.BigQueryOptions( + location=bigquery_location, use_regional_endpoints=True + ) ) - )