From ab34415e88a16d36c495fb0b664647d9aac11cfb Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 7 May 2024 12:45:35 -0500 Subject: [PATCH 1/2] fix: add jellyfish dependencies --- setup.py | 2 ++ testing/constraints-3.9.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 2ccf63259c..d5d282d11a 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import io import itertools import os @@ -45,6 +46,7 @@ "google-cloud-resource-manager >=1.10.3", "google-cloud-storage >=2.0.0", "ibis-framework[bigquery] >=8.0.0,<9.0.0dev", + "jellyfish >=0.8.9", # TODO: Relax upper bound once we have fixed `system_prerelease` tests. "pandas >=1.5.0", "pyarrow >=8.0.0", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index f5007ed564..3c51668655 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -11,6 +11,7 @@ google-cloud-iam==2.12.1 google-cloud-resource-manager==1.10.3 google-cloud-storage==2.0.0 ibis-framework==8.0.0 +jellyfish==0.8.9 pandas==1.5.0 pyarrow==8.0.0 pydata-google-auth==1.8.2 From 85663ef50d452cf1d0cd2e413068151b9f875bb5 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 7 May 2024 14:34:05 -0500 Subject: [PATCH 2/2] feat: suggest correct options in bpd.options.bigquery.location --- bigframes/_config/bigquery_options.py | 11 +++++++++-- tests/unit/_config/test_bigquery_options.py | 11 ++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 74561e6f24..6f841a36b3 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -21,6 +21,7 @@ import google.api_core.exceptions import google.auth.credentials +import jellyfish import bigframes.constants import bigframes.exceptions @@ -30,7 +31,8 @@ "Call bigframes.pandas.close_session() first, if you are using the bigframes.pandas API." ) -UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value." + +UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value. Did you mean '{possibility}'?" def _validate_location(value: Optional[str]): @@ -39,8 +41,13 @@ def _validate_location(value: Optional[str]): return if value not in bigframes.constants.ALL_BIGQUERY_LOCATIONS: + location = str(value) + possibility = min( + bigframes.constants.ALL_BIGQUERY_LOCATIONS, + key=lambda item: jellyfish.levenshtein_distance(location, item), + ) warnings.warn( - UNKNOWN_LOCATION_MESSAGE.format(location=value), + UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility), # There are many layers before we get to (possibly) the user's code: # -> bpd.options.bigquery.location = "us-central-1" # -> location.setter diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py index 7d9a452f42..b827b0723d 100644 --- a/tests/unit/_config/test_bigquery_options.py +++ b/tests/unit/_config/test_bigquery_options.py @@ -108,24 +108,25 @@ def test_location_set_to_valid_no_warning(valid_location): @pytest.mark.parametrize( [ "invalid_location", + "possibility", ], [ # Test with common mistakes, see article. # https://en.wikipedia.org/wiki/Edit_distance#Formal_definition_and_properties # Substitution - ("us-wist-3",), + ("us-wist3", "us-west3"), # Insertion - ("us-central-1",), + ("us-central-1", "us-central1"), # Deletion - ("asia-suth2",), + ("asia-suth2", "asia-south2"), ], ) -def test_location_set_to_invalid_warning(invalid_location): +def test_location_set_to_invalid_warning(invalid_location, possibility): options = bigquery_options.BigQueryOptions() with pytest.warns( bigframes.exceptions.UnknownLocationWarning, match=re.escape( - f"The location '{invalid_location}' is set to an unknown value." + f"The location '{invalid_location}' is set to an unknown value. Did you mean '{possibility}'?" ), ): options.location = invalid_location