From ae538f699d9fbd25d8d4d0ff44b29909a019e723 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 16:26:42 -0400 Subject: [PATCH 1/4] Script for scrubbing leaked datasets by prefix. See #6098. --- bigquery/tests/scrub_datasets.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 bigquery/tests/scrub_datasets.py diff --git a/bigquery/tests/scrub_datasets.py b/bigquery/tests/scrub_datasets.py new file mode 100644 index 000000000000..7a106124244a --- /dev/null +++ b/bigquery/tests/scrub_datasets.py @@ -0,0 +1,27 @@ +import glob +import re +import sys + +from google.api_core.exceptions import NotFound +from google.cloud.bigquery import Client + + +def main(prefixes): + client = Client() + + pattern = re.compile( + '|'.join('^{}.*$'.format(prefix) for prefix in prefixes)) + + ds_items = list(client.list_datasets()) + for dataset in ds_items: + ds_id = dataset.dataset_id + if pattern.match(ds_id): + print("Deleting dataset: {}".format(ds_id)) + try: + client.delete_dataset(dataset.reference, delete_contents=True) + except NotFound: + print(" NOT FOUND") + + +if __name__ == '__main__': + main(sys.argv[1:]) From c7643fa6c9b4e2e4e95a7f72ba4e9b306b979798 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 16:28:21 -0400 Subject: [PATCH 2/4] Don't leak dataset after intentionally failing to delete it. Closes #6098. --- bigquery/tests/system.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index a94a672198b4..8e0763a08ab6 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -293,10 +293,7 @@ def test_delete_dataset_delete_contents_true(self): def test_delete_dataset_delete_contents_false(self): from google.api_core import exceptions - dataset_id = _make_dataset_id('delete_table_false') - dataset = retry_403(Config.CLIENT.create_dataset)( - Dataset(Config.CLIENT.dataset(dataset_id))) - + dataset = self.temp_dataset(_make_dataset_id('delete_table_false')) table_id = 'test_table' table_arg = Table(dataset.table(table_id), schema=SCHEMA) From c39b194b21513a7d55ecd537187d80ae474cb112 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 16:29:04 -0400 Subject: [PATCH 3/4] Make dataset IDs unique for snippets / systests. Helps in diagnosing leaks. --- bigquery/docs/snippets.py | 20 ++++++++++---------- bigquery/tests/system.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index 6093abe95c1c..f87e82309fb3 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -1212,8 +1212,8 @@ def test_table_insert_rows(client, to_delete): def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" - dataset_id = 'table_upload_from_file_dataset_{}'.format(_millis()) - table_id = 'table_upload_from_file_table_{}'.format(_millis()) + dataset_id = 'load_table_from_file_dataset_{}'.format(_millis()) + table_id = 'load_table_from_file_table_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset.location = 'US' client.create_dataset(dataset) @@ -1261,7 +1261,7 @@ def test_load_table_from_file(client, to_delete): def test_load_table_from_uri_csv(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_csv_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1300,7 +1300,7 @@ def test_load_table_from_uri_csv(client, to_delete, capsys): def test_load_table_from_uri_json(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_json_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset.location = 'US' client.create_dataset(dataset) @@ -1381,7 +1381,7 @@ def test_load_table_from_uri_cmek(client, to_delete): def test_load_table_from_uri_parquet(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_parquet_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1414,7 +1414,7 @@ def test_load_table_from_uri_parquet(client, to_delete, capsys): def test_load_table_from_uri_orc(client, to_delete, capsys): - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_orc_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1458,7 +1458,7 @@ def test_load_table_from_uri_autodetect(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_auto_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1524,7 +1524,7 @@ def test_load_table_from_uri_append(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_append_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -1603,7 +1603,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys): followed by more shared code. Note that only the last format in the format-specific code section will be tested in this test. """ - dataset_id = 'load_table_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_uri_trunc_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) @@ -3051,7 +3051,7 @@ def test_list_rows_as_dataframe(client): @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') @pytest.mark.skipif(pyarrow is None, reason='Requires `pyarrow`') def test_load_table_from_dataframe(client, to_delete): - dataset_id = 'load_table_dataframe_dataset_{}'.format(_millis()) + dataset_id = 'load_table_from_dataframe_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) client.create_dataset(dataset) to_delete.append(dataset) diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 8e0763a08ab6..0db041de56ef 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -1186,7 +1186,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): self._fetch_single_page(table) def test_query_w_dml(self): - dataset_name = _make_dataset_id('dml_tests') + dataset_name = _make_dataset_id('dml_query') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} @@ -1202,7 +1202,7 @@ def test_query_w_dml(self): self.assertEqual(query_job.num_dml_affected_rows, 1) def test_dbapi_w_dml(self): - dataset_name = _make_dataset_id('dml_tests') + dataset_name = _make_dataset_id('dml_dbapi') table_name = 'test_table' self._load_table_for_dml([('Hello World',)], dataset_name, table_name) query_template = """UPDATE {}.{} From ed76f148aca2173b4dca1eff2fd65f81f170ae80 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 25 Sep 2018 17:16:30 -0400 Subject: [PATCH 4/4] Lint --- bigquery/tests/scrub_datasets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigquery/tests/scrub_datasets.py b/bigquery/tests/scrub_datasets.py index 7a106124244a..2e8981aa62e3 100644 --- a/bigquery/tests/scrub_datasets.py +++ b/bigquery/tests/scrub_datasets.py @@ -1,4 +1,3 @@ -import glob import re import sys