8000 feat: Add UpdateMode to update_dataset by chalmerlowe · Pull Request #2204 · googleapis/python-bigquery · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.enums import AutoRowIDs
from google.cloud.bigquery.enums import UpdateMode
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job import (
CopyJob,
Expand Down Expand Up @@ -1198,6 +1199,7 @@ def update_dataset(
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
update_mode: Optional[UpdateMode] = None,
) -> Dataset:
"""Change some fields of a dataset.

Expand Down Expand Up @@ -1237,6 +1239,20 @@ def update_dataset(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]):
Specifies the kind of information to update in a dataset.
By default, dataset metadata (e.g. friendlyName, description,
labels, etc) and ACL information are updated. This argument can
take on the following possible enum values.

* :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`:
The default value. Behavior defaults to UPDATE_FULL.
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`:
Includes metadata information for the dataset, such as friendlyName, description, labels, etc.
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`:
Includes ACL information for the dataset, which defines dataset access for one or more entities.
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`:
Includes both dataset metadata and ACL information.

Returns:
google.cloud.bigquery.dataset.Dataset:
Expand All @@ -1250,6 +1266,11 @@ def update_dataset(
path = dataset.path
span_attributes = {"path": path, "fields": fields}

if update_mode:
query_params = {"updateMode": update_mode.value}
else:
query_params = {}

api_response = self._call_api(
retry,
span_name="BigQuery.updateDataset",
Expand All @@ -1259,6 +1280,7 @@ def update_dataset(
data=partial,
headers=headers,
timeout=timeout,
query_params=query_params,
)
return Dataset.from_api_repr(api_response)

Expand Down
18 changes: 18 additions & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,24 @@ class BigLakeTableFormat(object):
"""Apache Iceberg format."""


class UpdateMode(enum.Enum):
"""Specifies the kind of information to update in a dataset."""

UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED"
"""The default value. Behavior defaults to UPDATE_FULL."""

UPDATE_METADATA = "UPDATE_METADATA"
"""Includes metadata information for the dataset, such as friendlyName,
description, labels, etc."""

UPDATE_ACL = "UPDATE_ACL"
"""Includes ACL information for the dataset, which defines dataset access
for one or more entities."""

UPDATE_FULL = "UPDATE_FULL"
"""Includes both dataset metadata and ACL information."""


class JobCreationMode(object):
"""Documented values for Job Creation Mode."""

Expand Down
93 changes: 92 additions & 1 deletion tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@

from google.cloud.bigquery import job as bqjob
import google.cloud.bigquery._job_helpers
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.dataset import DatasetReference, Dataset
from google.cloud.bigquery.enums import UpdateMode
from google.cloud.bigquery import exceptions
from google.cloud.bigquery import ParquetOptions
import google.cloud.bigquery.retry
Expand Down Expand Up @@ -2101,6 +2102,7 @@ def test_update_dataset(self):
},
path="/" + PATH,
timeout=7.5,
query_params={},
)
self.assertEqual(ds2.description, ds.description)
self.assertEqual(ds2.friendly_name, ds.friendly_name)
Expand All @@ -2114,6 +2116,94 @@ def test_update_dataset(self):
client.update_dataset(ds, [])
req = conn.api_request.call_args
self.assertEqual(req[1]["headers"]["If-Match"], "etag")
self.assertEqual(req[1].get("query_params"), {})

def test_update_dataset_w_update_mode(self):
PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}"
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)

DESCRIPTION = "DESCRIPTION"
RESOURCE = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
"description": DESCRIPTION,
}
dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
orig_dataset = Dataset(dataset_ref)
orig_dataset.description = DESCRIPTION
filter_fields = ["description"]

test_cases = [
(None, None),
(UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"),
(UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"),
(UpdateMode.UPDATE_ACL, "UPDATE_ACL"),
(UpdateMode.UPDATE_FULL, "UPDATE_FULL"),
]

for update_mode_arg, expected_param_value in test_cases:
with self.subTest(
update_mode_arg=update_mode_arg,
expected_param_value=expected_param_value,
):
conn = client._connection = make_connection(RESOURCE, RESOURCE)

new_dataset = client.update_dataset(
orig_dataset,
fields=filter_fields,
update_mode=update_mode_arg,
)
self.assertEqual(orig_dataset.description, new_dataset.description)

if expected_param_value:
expected_query_params = {"updateMode": expected_param_value}
else:
expected_query_params = {}

conn.api_request.assert_called_once_with(
method="PATCH",
path="/" + PATH,
data={"description": DESCRIPTION},
timeout=DEFAULT_TIMEOUT,
query_params=expected_query_params if expected_query_params else {},
)

def test_update_dataset_w_invalid_update_mode(self):
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)

DESCRIPTION = "DESCRIPTION"
resource = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
}

dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
orig_dataset = Dataset(dataset_ref)
orig_dataset.description = DESCRIPTION
filter_fields = ["description"] # A non-empty list of fields is required

# Mock the connection to prevent actual API calls
# and to provide a minimal valid response if the call were to proceed.
conn = client._connection = make_connection(resource)

test_cases = [
"INVALID_STRING",
123,
123.45,
object(),
]

for invalid_update_mode in test_cases:
with self.subTest(invalid_update_mode=invalid_update_mode):
conn.api_request.reset_mock() # Reset mock for each sub-test
with self.assertRaises(AttributeError):
client.update_dataset(
orig_dataset,
fields=filter_fields,
update_mode=invalid_update_mode,
)

def test_update_dataset_w_custom_property(self):
# The library should handle sending properties to the API that are not
Expand Down Expand Up @@ -2145,6 +2235,7 @@ def test_update_dataset_w_custom_property(self):
data={"newAlphaProperty": "unreleased property"},
path=path,
timeout=DEFAULT_TIMEOUT,
query_params={},
)

self.assertEqual(dataset.dataset_id, self.DS_ID)
Expand Down
0