8000 feat!: drop support for locational endpoints by shobsi · Pull Request #1542 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions bigframes/_config/bigquery_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ def allow_large_results(self, value: bool):

@property
def use_regional_endpoints(self) -> bool:
"""Flag to connect to regional API endpoints.
"""Flag to connect to regional API endpoints for BigQuery API and
BigQuery Storage API.

.. note::
Use of regional endpoints is a feature in Preview and available only
Expand All @@ -267,18 +268,16 @@ def use_regional_endpoints(self) -> bool:
"us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3"
and "us-west4".

.. deprecated:: 0.13.0
Use of locational endpoints is available only in selected projects.

Requires that ``location`` is set. For supported regions, for example
``europe-west3``, you need to specify ``location='europe-west3'`` and
``use_regional_endpoints=True``, and then BigQuery DataFrames would
connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``.
For not supported regions, for example ``asia-northeast1``, when you
specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
a different endpoint (called locational endpoint, now deprecated, used
to provide weaker promise on the request remaining within the location
during transit) ``europe-west3-bigquery.googleapis.com`` would be used.
Requires that ``location`` is set. For [supported regions](https://cloud.google.com/bigquery/docs/regional-endpoints),
for example ``europe-west3``, you need to specify
``location='europe-west3'`` and ``use_regional_endpoints=True``, and
then BigQuery DataFrames would connect to the BigQuery endpoint
``bigquery.europe-west3.rep.googleapis.com``. For not supported regions,
for example ``asia-northeast1``, when you specify
``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
the global endpoint ``bigquery.googleapis.com`` would be used, which
does not promise any guarantee on the request remaining within the
location during transit.

Returns:
bool:
Expand Down
29 changes: 17 additions & 12 deletions bigframes/constants.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,27 @@
}
)

# https://cloud.google.com/storage/docs/locational-endpoints
LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
REP_NOT_ENABLED_BIGQUERY_LOCATIONS = frozenset(
ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS
)

LEP_DEPRECATION_WARNING_MESSAGE = textwrap.dedent(
LOCATION_NEEDED_FOR_REP_MESSAGE = textwrap.dedent(
"""
Support for regional endpoints is not yet available in the location
{location} for BigQuery and BigQuery Storage APIs. For the supported
locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints.
For other locations and APIs, currently an older, now deprecated locational
endpoints are being used, which requires your project to be allowlisted. In
future version 2.0 onwards the locational endpoints will no longer be
supported automatically when you enable regional endpoints. However, if you
still need them, you will be able to override the endpoints directly by
doing the following:
Must set location to use regional endpoints.
You can do it via bigframaes.pandas.options.bigquery.location.
The supported locations can be found at
https://cloud.google.com/bigquery/docs/regional-endpoints#supported-locations.
"""
).strip()

REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent(
"""
Support for regional endpoints for BigQuery and BigQuery Storage APIs may
not be available in the location {location}. For the supported APIs and
locations see https://cloud.google.com/bigquery/docs/regional-endpoints.
If you have the (deprecated) locational endpoints enabled in your project
(which requires your project to be allowlisted), you can override the
endpoints directly by doing the following:
bigframes.pandas.options.bigquery.client_endpoints_override = {{
"bqclient": "https://{location}-bigquery.googleapis.com",
"bqconnectionclient": "{location}-bigqueryconnection.googleapis.com",
Expand Down
62 changes: 17 additions & 45 deletions bigframes/session/clients.py
warnings.warn(msg, category=FutureWarning)
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import os
import typing
from typing import Optional
import warnings

import google.api_core.client_info
import google.api_core.client_options
Expand All @@ -32,7 +31,6 @@
import pydata_google_auth

import bigframes.constants
import bigframes.exceptions as bfe
import bigframes.version

from . import environment
Expand All @@ -43,16 +41,11 @@


# BigQuery is a REST API, which requires the protocol as part of the URL.
_BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com"
_BIGQUERY_REGIONAL_ENDPOINT = "https://bigquery.{location}.rep.googleapis.com"

# BigQuery Connection and Storage are gRPC APIs, which don't support the
# https:// protocol in the API endpoint URL.
_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com"
_BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com"
_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = (
"https://bigquerystorage.{location}.rep.googleapis.com"
)
_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com"


def _get_default_credentials_with_project():
Expand Down Expand Up @@ -114,19 +107,18 @@ def __init__(
)
self._project = project

if (
use_regional_endpoints
and location is not None
and location.lower()
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
):
msg = bfe.format_message(
bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format(
location=location
),
fill=False,
)
if use_regional_endpoints:
if location is None:
raise ValueError(bigframes.constants.LOCATION_NEEDED_FOR_REP_MESSAGE)
elif (
location.lower()
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
):
raise ValueError(
bigframes.constants.REP_NOT_SUPPORTED_MESSAGE.format(
location=location
)
)
self._location = location
self._use_regional_endpoints = use_regional_endpoints

Expand Down Expand Up @@ -156,16 +148,8 @@ def _create_bigquery_client(self):
api_endpoint=self._client_endpoints_override["bqclient"]
)
elif self._use_regional_endpoints:
endpoint_template = _BIGQUERY_REGIONAL_ENDPOINT
if (
self._location is not None
and self._location.lower()
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
):
endpoint_template = _BIGQUERY_LOCATIONAL_ENDPOINT

bq_options = google.api_core.client_options.ClientOptions(
api_endpoint=endpoint_template.format(location=self._location)
api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location)
)

bq_info = google.api_core.client_info.ClientInfo(
Expand Down Expand Up @@ -212,12 +196,6 @@ def bqconnectionclient(self):
bqconnection_options = google.api_core.client_options.ClientOptions(
api_endpoint=self._client_endpoints_override["bqconnectionclient"]
)
elif self._use_regional_endpoints:
bqconnection_options = google.api_core.client_options.ClientOptions(
api_endpoint=_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT.format(
location=self._location
)
)

bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo(
user_agent=self._application_name
Expand All @@ -241,16 +219,10 @@ def bqstoragereadclient(self):
api_endpoint=self._client_endpoints_override["bqstoragereadclient"]
)
elif self._use_regional_endpoints:
endpoint_template = _BIGQUERYSTORAGE_REGIONAL_ENDPOINT
if (
self._location is not None
and self._location.lower()
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
):
endpoint_template = _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT

bqstorage_options = google.api_core.client_options.ClientOptions(
api_endpoint=endpoint_template.format(location=self._location)
api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format(
location=self._location
)
)

bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo(
Expand Down
9E81 110 changes: 75 additions & 35 deletions tests/system/large/test_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
# limitations under the License.

import typing
import warnings

from google.cloud import bigquery
from google.cloud.bigquery_storage import types as bqstorage_types
import pandas
import pandas.testing
import pytest

import bigframes
Expand All @@ -41,6 +43,7 @@ def _assert_bq_execution_location(

assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location

# Ensure operation involving BQ client suceeds
result = (
df[["name", "number"]]
.groupby("name")
Expand All @@ -53,6 +56,35 @@ def _assert_bq_execution_location(
typing.cast(bigquery.QueryJob, result.query_job).location == expected_location
)

expected_result = pandas.DataFrame(
{"number": [444, 222]}, index=pandas.Index(["aaa", "bbb"], name="name")
)
pandas.testing.assert_frame_equal(
expected_result, result.to_pandas(), check_dtype=False, check_index_type=False
)

# Ensure BQ Storage Read client operation succceeds
table = result.query_job.destination
requested_session = bqstorage_types.ReadSession( # type: ignore[attr-defined]
table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}",
data_format=bqstorage_types.DataFormat.ARROW, # type: ignore[attr-defined]
)
read_session = session.bqstoragereadclient.create_read_session(
parent=f"projects/{table.project}",
read_session=requested_session,
max_stream_count=1,
)
reader = session.bqstoragereadclient.read_rows(read_session.streams[0].name)
frames = []
for message in reader.rows().pages:
frames.append(message.to_dataframe())
read_dataframe = pandas.concat(frames)
# normalize before comparing since we lost some of the bigframes column
# naming abtractions in the direct read of the destination table
read_dataframe = read_dataframe.set_index("name")
read_dataframe.columns = result.columns
pandas.testing.assert_frame_equal(expected_result, read_dataframe)


def test_bq_location_default():
session = bigframes.Session()
Expand Down Expand Up @@ -119,22 +151,14 @@ def test_bq_location_non_canonical(set_location, resolved_location):
sorted(bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS),
)
def test_bq_rep_endpoints(bigquery_location):
with warnings.catch_warnings(record=True) as record:
warnings.simplefilter("always")
session = bigframes.Session(
context=bigframes.BigQueryOptions(
location=bigquery_location, use_regional_endpoints=True
)
)
assert (
len([warn for warn in record if isinstance(warn.message, FutureWarning)])
== 0
session = bigframes.Session(
context=bigframes.BigQueryOptions(
location=bigquery_location, use_regional_endpoints=True
)
)

# Verify that location and endpoints are correctly set for the BigQuery API
# Verify that location and endpoint is correctly set for the BigQuery API
# client
# TODO(shobs): Figure out if the same can be verified for the other API
# clients.
assert session.bqclient.location == bigquery_location
assert (
session.bqclient._connection.API_BASE_URL
Expand All @@ -143,36 +167,52 @@ def test_bq_rep_endpoints(bigquery_location):
)
)

# Verify that endpoint is correctly set for the BigQuery Storage API client
# TODO(shobs): Figure out if we can verify that location is set in the
# BigQuery Storage API client.
assert (
session.bqstoragereadclient.api_endpoint
== f"bigquerystorage.{bigquery_location}.rep.googleapis.com"
)

# assert that bigframes session honors the location
_assert_bq_execution_location(session)


def test_clients_provider_no_location():
with pytest.raises(ValueError, match="Must set location to use regional endpoints"):
bigframes.session.clients.ClientsProvider(use_regional_endpoints=True)


@pytest.mark.parametrize(
"bigquery_location",
# Sort the set to avoid nondeterminism.
sorted(bigframes.constants.LEP_ENABLED_BIGQUERY_LOCATIONS),
sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
)
def test_bq_lep_endpoints(bigquery_location):
# We are not testing BigFrames Session for LEP endpoints because it involves
# query execution using the endpoint, which requires the project to be
# allowlisted for LEP access. We could hardcode one project which is
# allowlisted but then not every open source developer will have access to
# that. Let's rely on just creating the clients for LEP.
with pytest.warns(FutureWarning) as record:
clients_provider = bigframes.session.clients.ClientsProvider(
def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location):
with pytest.raises(
ValueError,
match=f"not .*available in the location {bigquery_location}",
):
bigframes.session.clients.ClientsProvider(
location=bigquery_location, use_regional_endpoints=True
)
assert len(record) == 1
assert bigquery_location in typing.cast(Warning, record[0].message).args[0]

# Verify that location and endpoints are correctly set for the BigQuery API
# client
# TODO(shobs): Figure out if the same can be verified for the other API
# clients.
assert clients_provider.bqclient.location == bigquery_location
assert (
clients_provider.bqclient._connection.API_BASE_URL
== "https://{location}-bigquery.googleapis.com".format(
location=bigquery_location

@pytest.mark.parametrize(
"bigquery_location",
# Sort the set to avoid nondeterminism.
sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
)
def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints(
bigquery_location,
):
with pytest.raises(
ValueError,
match=f"not .*available in the location {bigquery_location}",
):
bigframes.Session(
context=bigframes.BigQueryOptions(
location=bigquery_location, use_regional_endpoints=True
)
)
)
0