From abcd9c5fdc658670a828f1397d93d8bd2b004ea6 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 3 Oct 2024 01:28:55 +0000 Subject: [PATCH 1/3] fix: make invalid location warning case-insensitive --- bigframes/_config/bigquery_options.py | 55 ++++++++++++--------- bigframes/constants.py | 9 ++-- tests/unit/_config/test_bigquery_options.py | 4 ++ 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 502f103bb5..0f0259a8d8 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -36,26 +36,38 @@ UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value. Did you mean '{possibility}'?" -def _validate_location(value: Optional[str]): - - if value is None: - return - - if value not in bigframes.constants.ALL_BIGQUERY_LOCATIONS: - location = str(value) - possibility = min( - bigframes.constants.ALL_BIGQUERY_LOCATIONS, - key=lambda item: jellyfish.levenshtein_distance(location, item), - ) - warnings.warn( - UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility), - # There are many layers before we get to (possibly) the user's code: - # -> bpd.options.bigquery.location = "us-central-1" - # -> location.setter - # -> _validate_location - stacklevel=3, - category=bigframes.exceptions.UnknownLocationWarning, - ) +def _get_validated_location(location: Optional[str]) -> Optional[str]: + + if location is None: + return location + + location = str(location) + if location in bigframes.constants.ALL_BIGQUERY_LOCATIONS: + return location + + location_lowercase = location.lower() + if location_lowercase in bigframes.constants.BIGQUERY_REGIONS: + return location_lowercase + + location_uppercase = location.upper() + if location_uppercase in bigframes.constants.BIGQUERY_MULTIREGIONS: + return location_uppercase + + possibility = min( + bigframes.constants.ALL_BIGQUERY_LOCATIONS, + key=lambda item: jellyfish.levenshtein_distance(location, item), + ) + warnings.warn( + UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility), + # There are many layers before we get to (possibly) the user's code: + # -> bpd.options.bigquery.location = "us-central-1" + # -> location.setter + # -> _get_validated_location + stacklevel=3, + category=bigframes.exceptions.UnknownLocationWarning, + ) + + return location def _validate_ordering_mode(value: str) -> bigframes.enums.OrderingMode: @@ -135,8 +147,7 @@ def location(self) -> Optional[str]: def location(self, value: Optional[str]): if self._session_started and self._location != value: raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="location")) - _validate_location(value) - self._location = value + self._location = _get_validated_location(value) @property def project(self) -> Optional[str]: diff --git a/bigframes/constants.py b/bigframes/constants.py index 4d5b6b8eb3..e0c8305079 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -22,9 +22,8 @@ DEFAULT_EXPIRATION = datetime.timedelta(days=7) # https://cloud.google.com/bigquery/docs/locations -ALL_BIGQUERY_LOCATIONS = frozenset( +BIGQUERY_REGIONS = frozenset( { - # regions "us-east5", "us-south1", "us-central1", @@ -68,11 +67,15 @@ "me-central1", "me-west1", "africa-south1", - # multi-regions + } +) +BIGQUERY_MULTIREGIONS = frozenset( + { "US", "EU", } ) +ALL_BIGQUERY_LOCATIONS = frozenset(BIGQUERY_REGIONS.union(BIGQUERY_MULTIREGIONS)) # https://cloud.google.com/storage/docs/regional-endpoints REP_ENABLED_BIGQUERY_LOCATIONS = frozenset( diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py index b827b0723d..f40c140a9e 100644 --- a/tests/unit/_config/test_bigquery_options.py +++ b/tests/unit/_config/test_bigquery_options.py @@ -90,6 +90,10 @@ def test_setter_if_session_started_but_setting_the_same_value(attribute): [ (None,), ("us-central1",), + ("us-Central1",), + ("US-CENTRAL1",), + ("US",), + ("us",), ], ) def test_location_set_to_valid_no_warning(valid_location): From 562098c7278c43d7e884b79bf8a799dfb8ec8f60 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 3 Oct 2024 08:48:00 +0000 Subject: [PATCH 2/3] fix failing unit test --- bigframes/_config/bigquery_options.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 0f0259a8d8..e81557e343 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -36,14 +36,12 @@ UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value. Did you mean '{possibility}'?" -def _get_validated_location(location: Optional[str]) -> Optional[str]: +def _get_validated_location(value: Optional[str]) -> Optional[str]: - if location is None: - return location + if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS: + return value - location = str(location) - if location in bigframes.constants.ALL_BIGQUERY_LOCATIONS: - return location + location = str(value) location_lowercase = location.lower() if location_lowercase in bigframes.constants.BIGQUERY_REGIONS: @@ -67,7 +65,7 @@ def _get_validated_location(location: Optional[str]) -> Optional[str]: category=bigframes.exceptions.UnknownLocationWarning, ) - return location + return value def _validate_ordering_mode(value: str) -> bigframes.enums.OrderingMode: From 70a211cc666acf618d22c6d766b2f8bc0eba2ffb Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 3 Oct 2024 17:13:54 +0000 Subject: [PATCH 3/3] add system tests for non canonical location setting --- tests/system/large/test_location.py | 39 +++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 204c6b7463..2ef002d7e0 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -22,7 +22,9 @@ import bigframes.session.clients -def _assert_bq_execution_location(session: bigframes.Session): +def _assert_bq_execution_location( + session: bigframes.Session, expected_location: typing.Optional[str] = None +): df = session.read_gbq( """ SELECT "aaa" as name, 111 as number @@ -33,10 +35,10 @@ def _assert_bq_execution_location(session: bigframes.Session): """ ) - assert ( - typing.cast(bigquery.QueryJob, df.query_job).location - == session.bqclient.location - ) + if expected_location is None: + expected_location = session._location + + assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location result = ( df[["name", "number"]] @@ -47,8 +49,7 @@ def _assert_bq_execution_location(session: bigframes.Session): ) assert ( - typing.cast(bigquery.QueryJob, result.query_job).location - == session.bqclient.location + typing.cast(bigquery.QueryJob, result.query_job).location == expected_location ) @@ -87,6 +88,30 @@ def test_bq_location(bigquery_location): _assert_bq_execution_location(session) +@pytest.mark.parametrize( + ("set_location", "resolved_location"), + # Sort the set to avoid nondeterminism. + [ + (loc.capitalize(), loc) + for loc in sorted(bigframes.constants.ALL_BIGQUERY_LOCATIONS) + ], +) +def test_bq_location_non_canonical(set_location, resolved_location): + session = bigframes.Session( + context=bigframes.BigQueryOptions(location=set_location) + ) + + assert session.bqclient.location == set_location + + # by default global endpoint is used + assert ( + session.bqclient._connection.API_BASE_URL == "https://bigquery.googleapis.com" + ) + + # assert that bigframes session honors the location + _assert_bq_execution_location(session, resolved_location) + + @pytest.mark.parametrize( "bigquery_location", # Sort the set to avoid nondeterminism.