From d93607057c6bc22704e8ebbd5dce1febb5c8da51 Mon Sep 17 00:00:00 2001 From: milkshakeiii Date: Tue, 21 May 2024 20:58:05 +0000 Subject: [PATCH 1/2] fix: warn and disable time travel for linked datasets --- bigframes/session/__init__.py | 27 +++++++++++++++++++++++++-- tests/system/small/test_session.py | 7 +++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index ccdc3c5eeb..b7440cea64 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -676,7 +676,8 @@ def _read_gbq_table( # Fetch table metadata and validate # --------------------------------- - (time_travel_timestamp, table,) = bf_read_gbq_table.get_table_metadata( + time_travel_timestamp: Optional[datetime.datetime] = None + time_travel_timestamp, table = bf_read_gbq_table.get_table_metadata( self.bqclient, table_ref=table_ref, api_name=api_name, @@ -758,9 +759,31 @@ def _read_gbq_table( # Use a time travel to make sure the DataFrame is deterministic, even # if the underlying table changes. - # TODO(b/340540991): If a dry run query fails with time travel but + + # If a dry run query fails with time travel but # succeeds without it, omit the time travel clause and raise a warning # about potential non-determinism if the underlying tables are modified. + sql = bigframes.session._io.bigquery.to_query( + f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}", + index_cols=index_cols, + columns=columns, + filters=filters, + time_travel_timestamp=time_travel_timestamp, + max_results=None, + ) + dry_run_config = bigquery.QueryJobConfig() + dry_run_config.dry_run = True + try: + self._start_query(sql, job_config=dry_run_config) + except google.api_core.exceptions.NotFound: + time_travel_timestamp = None + warnings.warn( + "NotFound error when reading table with time travel." + " Attempting query without time travel. Warning: Without" + " time travel, modifications to the underlying table may" + " result in errors or unexpected behavior." + ) + table_expression = bf_read_gbq_table.get_ibis_time_travel_table( ibis_client=self.ibis_client, table_ref=table_ref, diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 2b7c6178ff..5026e6fb78 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -20,6 +20,7 @@ import time import typing from typing import List, Optional, Sequence +import warnings import google import google.cloud.bigquery as bigquery @@ -386,6 +387,12 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): assert df3 is not None +def test_read_gbq_on_linked_dataset_warns(session): + with warnings.catch_warnings(record=True) as warned: + session.read_gbq("bigframes-dev.thelook_ecommerce.orders") + assert len(warned) == 1 + + def test_read_gbq_table_clustered_with_filter(session: bigframes.Session): df = session.read_gbq_table( "bigquery-public-data.cloud_storage_geo_index.landsat_index", From e42aa8bf12cfc5c9910075923d3d7c40d8344843 Mon Sep 17 00:00:00 2001 From: milkshakeiii Date: Tue, 21 May 2024 22:14:15 +0000 Subject: [PATCH 2/2] add warning type --- bigframes/exceptions.py | 4 ++++ bigframes/session/__init__.py | 5 ++++- tests/system/small/test_session.py | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index 027b3a4236..1162217fc1 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -41,3 +41,7 @@ class PreviewWarning(Warning): class NullIndexError(ValueError): """Object has no index.""" + + +class TimeTravelDisabledWarning(Warning): + """A query was reattempted without time travel.""" diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index b7440cea64..58aaa3b746 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -776,12 +776,15 @@ def _read_gbq_table( try: self._start_query(sql, job_config=dry_run_config) except google.api_core.exceptions.NotFound: + # note that a notfound caused by a simple typo will be + # caught above when the metadata is fetched, not here time_travel_timestamp = None warnings.warn( "NotFound error when reading table with time travel." " Attempting query without time travel. Warning: Without" " time travel, modifications to the underlying table may" - " result in errors or unexpected behavior." + " result in errors or unexpected behavior.", + category=bigframes.exceptions.TimeTravelDisabledWarning, ) table_expression = bf_read_gbq_table.get_ibis_time_travel_table( diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 5026e6fb78..c617eab7f5 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -391,6 +391,7 @@ def test_read_gbq_on_linked_dataset_warns(session): with warnings.catch_warnings(record=True) as warned: session.read_gbq("bigframes-dev.thelook_ecommerce.orders") assert len(warned) == 1 + assert warned[0].category == bigframes.exceptions.TimeTravelDisabledWarning def test_read_gbq_table_clustered_with_filter(session: bigframes.Session):