8000 feat: added --destination_table parameter to bigquery magic · googleapis/google-cloud-python@ea35144 · GitHub
[go: up one dir, main page]

Skip to content

Commit ea35144

Browse files
feat: added --destination_table parameter to bigquery magic
1 parent 48359eb commit ea35144

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

bigquery/google/cloud/bigquery/magics.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
this parameter is used. If an error occurs during the query execution,
3232
the corresponding ``QueryJob`` instance (if available) is stored in
3333
the variable instead.
34+
* ``--destination_table`` (optional, line argument):
35+
A dataset and table to store the query results. If table does not exists,
36+
it will be created. If table already exists, its data will be overwritten.
37+
Variable should be in a format <dataset_id>.<table_id>.
3438
* ``--project <project>`` (optional, line argument):
3539
Project to use for running the query. Defaults to the context
3640
:attr:`~google.cloud.bigquery.magics.Context.project`.
@@ -145,6 +149,7 @@
145149
raise ImportError("This module can only be loaded in IPython.")
146150

147151
from google.api_core import client_info
152+
from google.api_core.exceptions import NotFound
148153
import google.auth
149154
from google.cloud import bigquery
150155
from google.cloud.bigquery.dbapi import _helpers
@@ -336,12 +341,48 @@ def _run_query(client, query, job_config=None):
336341
return query_job
337342

338343

344+
def _create_dataset_if_necessary(client, dataset_id):
345+
"""Creates a dataset in the current project if it doesn't exist
346+
347+
Args:
348+
client (google.cloud.bigquery.client.Client):
349+
Client to bundle configuration needed for API requests.
350+
dataset_id (str):
351+
Dataset id.
352+
353+
Returns:
354+
True if dataset was created, otherwise False.
355+
"""
356+
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
357+
try:
358+
dataset = client.get_dataset(dataset_reference)
359+
return False
360+
except NotFound:
361+
pass
362+
dataset = bigquery.Dataset(dataset_reference)
363+
dataset.location = client.location
364+
print("Creating dataset: {}".format(dataset_id))
365+
dataset = client.create_dataset(dataset)
366+
return True
367+
368+
339369
@magic_arguments.magic_arguments()
340370
@magic_arguments.argument(
341371
"destination_var",
342372
nargs="?",
343373
help=("If provided, save the output to this variable instead of displaying it."),
344374
)
375+
@magic_arguments.argument(
376+
"--destination_table",
377+
type=str,
378+
default=None,
379+
help=(
380+
"If provided, save the output of the query to a new BigQuery table."
381+
"Variable should be in a format <dataset_id>.<table_id>."
382+
"If table does not exists, it will be created."
383+
"If table already exists, its data will be overwritten."
384+
),
385+
)
345386
@magic_arguments.argument(
346387
"--project",
347388
type=str,
@@ -485,6 +526,21 @@ def _cell_magic(line, query):
485526
job_config.use_legacy_sql = args.use_legacy_sql
486527
job_config.dry_run = args.dry_run
487528

529+
if args.destination_table:
530+
split = args.destination_table.split(".")
531+
if len(split) != 2:
532+
raise ValueError(
533+
"--destination_table should be in a <dataset_id>.<table_id> format."
534+
)
535+
dataset_id, table_id = split
536+
job_config.allow_large_results = True
537+
dataset_ref = client.dataset(dataset_id)
538+
destination_table_ref = dataset_ref.table(table_id)
539+
job_config.destination = destination_table_ref
540+
job_config.create_disposition = "CREATE_IF_NEEDED"
541+
job_config.write_disposition = "WRITE_TRUNCATE"
542+
_create_dataset_if_necessary(client, dataset_id)
543+
488544
if args.maximum_bytes_billed == "None":
489545
job_config.maximum_bytes_billed = 0
490546
elif args.maximum_bytes_billed is not None:

bigquery/tests/unit/test_magics.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from google.cloud import bigquery_storage_v1beta1
4040
except ImportError: # pragma: NO COVER
4141
bigquery_storage_v1beta1 = None
42+
from google.cloud import bigquery
4243
from google.cloud.bigquery import job
4344
from google.cloud.bigquery import table
4445
from google.cloud.bigquery import magics
@@ -336,6 +337,38 @@ def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib):
336337
assert "grpcio" in str(exc_context.value)
337338

338339

340+
def test__create_dataset_if_necessary_exists():
341+
project = "project_id"
342+
dataset_id = "dataset_id"
343+
dataset_reference = bigquery.dataset.DatasetReference(project, dataset_id)
344+
dataset = bigquery.Dataset(dataset_reference)
345+
client_patch = mock.patch(
346+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
347+
)
348+
with client_patch as client_mock:
349+
client_mock().project = project
350+
client_mock().get_dataset.result_value = dataset
351+
result = magics._create_dataset_if_necessary(client_mock(), dataset_id)
352+
client_mock().create_dataset.assert_not_called()
353+
354+
assert result is False
355+
356+
357+
def test__create_dataset_if_necessary_not_exist():
358+
project = "project_id"
359+
dataset_id = "dataset_id"
360+
client_patch = mock.patch(
361+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
362+
)
363+
with client_patch as client_mock:
364+
client_mock().location = "us"
365+
client_mock().project = project
366+
client_mock().get_dataset.side_effect = exceptions.NotFound("dataset not found")
367+
result = magics._create_dataset_if_necessary(client_mock(), dataset_id)
368+
client_mock().create_dataset.assert_called_once()
369+
assert result is True
370+
371+
339372
@pytest.mark.usefixtures("ipython_interactive")
340373
def test_extension_load():
341374
ip = IPython.get_ipython()
@@ -1199,3 +1232,62 @@ def test_bigquery_magic_omits_tracebacks_from_error_message():
11991232
assert "400 Syntax error in SQL query" in output
12001233
assert "Traceback (most recent call last)" not in output
12011234
assert "Syntax error" not in captured_io.stdout
1235+
1236+
1237+
@pytest.mark.usefixtures("ipython_interactive")
1238+
def test_bigquery_magic_w_destination_table_invalid_format():
1239+
ip = IPython.get_ipython()
1240+
ip.extension_manager.load_extension("google.cloud.bigquery")
1241+
magics.context._project = None
1242+
1243+
credentials_mock = mock.create_autospec(
1244+
google.auth.credentials.Credentials, instance=True
1245+
)
1246+
default_patch = mock.patch(
1247+
"google.auth.default", return_value=(credentials_mock, "general-project")
1248+
)
1249+
1250+
client_patch = mock.patch(
1251+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
1252+
)
1253+
1254+
with client_patch, default_patch, pytest.raises(ValueError) as exc_context:
1255+
ip.run_cell_magic(
1256+
"bigquery", "--destination_table dataset", "SELECT foo FROM WHERE LIMIT bar"
1257+
)
1258+
error_msg = str(exc_context.value)
1259+
assert (
1260+
"--destination_table should be in a "
1261+
"<dataset_id>.<table_id> format." in error_msg
1262+
)
1263+
1264+
1265+
@pytest.mark.usefixtures("ipython_interactive")
1266+
def test_bigquery_magic_w_destination_table():
1267+
ip = IPython.get_ipython()
1268+
ip.extension_manager.load_extension("google.cloud.bigquery")
1269+
magics.context.credentials = mock.create_autospec(
1270+
google.auth.credentials.Credentials, instance=True
1271+
)
1272+
1273+
create_dataset_if_necessary_patch = mock.patch(
1274+
"google.cloud.bigquery.magics._create_dataset_if_necessary", autospec=True
1275+
)
1276+
1277+
run_query_patch = mock.patch(
1278+
"google.cloud.bigquery.magics._run_query", autospec=True
1279+
)
1280+
1281+
with create_dataset_if_necessary_patch, run_query_patch as run_query_mock:
1282+
ip.run_cell_magic(
1283+
"bigquery",
1284+
"--destination_table dataset_id.table_id",
1285+
"SELECT foo FROM WHERE LIMIT bar",
1286+
)
1287+
1288+
job_config_used = run_query_mock.call_args_list[0][1]["job_config"]
1289+
assert job_config_used.allow_large_results is True
1290+
assert job_config_used.create_disposition == "CREATE_IF_NEEDED"
1291+
assert job_config_used.write_disposition == "WRITE_TRUNCATE"
1292+
assert job_config_used.destination.dataset_id == "dataset_id"
1293+
assert job_config_used.destination.table_id == "table_id"

0 commit comments

Comments
 (0)
0