8000 feat: add support for table clones by abecerrilsalas · Pull Request #1235 · googleapis/python-bigquery · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ff9d5cd
feat: add support for table clones
abecerrilsalas Apr 29, 2022
4820c51
Merge branch 'main' of https://github.com/googleapis/python-bigquery …
abecerrilsalas Apr 29, 2022
7a97b1e
Merge branch 'googleapis:main' into apb_feat_add_support_for_table_cl…
abecerrilsalas Apr 29, 2022
ce6014e
feat: clone test
abecerrilsalas Apr 29, 2022
ce791db
Merge branch 'apb_feat_add_support_for_table_clones' of https://githu…
abecerrilsalas Apr 29, 2022
8915b1d
feat: debugging
abecerrilsalas May 2, 2022
e14a95f
feat: more debugging
abecerrilsalas May 2, 2022
ccddd7c
feat: more debugging
abecerrilsalas May 2, 2022
40c81d2
Merge branch 'main' into apb_feat_add_support_for_table_clones
abecerrilsalas May 2, 2022
c7fbaff
feat: even more debugging
abecerrilsalas May 2, 2022
625923d
Merge branch 'apb_feat_add_support_for_table_clones' of https://githu…
abecerrilsalas May 2, 2022
0328df9
feat: debugging test
abecerrilsalas May 2, 2022
911ba1e
feat: even more test debugging
abecerrilsalas May 2, 2022
d382bbb
feat: check
abecerrilsalas May 2, 2022
e705e16
feat: modify test
abecerrilsalas May 2, 2022
dc4f401
feat: deleting print statement
abecerrilsalas May 2, 2022
0bb9cbb
feat: testing
abecerrilsalas May 2, 2022
63e94e9
feat: test update
abecerrilsalas May 3, 2022
8c82e6c
feat: change table name
abecerrilsalas May 3, 2022
7302c2a
Merge branch 'main' into apb_feat_add_support_for_table_clones
abecerrilsalas May 3, 2022
e04b1c6
feat: changing table names
abecerrilsalas May 3, 2022
5370a38
Merge branch 'apb_feat_add_support_for_table_clones' of https://githu…
abecerrilsalas May 3, 2022
fbe0869
feat: debugging table name
abecerrilsalas May 3, 2022
c456b00
feat: cleaning up test
abecerrilsalas May 3, 2022
5b87d2e
feat: degubbing test
abecerrilsalas May 3, 2022
bf2d586
feat: add properties check to test
abecerrilsalas May 6, 2022
ce94065
feat: test change
abecerrilsalas May 6, 2022
78a45ca
feat: added more properties
abecerrilsalas May 6, 2022
ec14b3b
Merge branch 'main' into apb_feat_add_support_for_table_clones
abecerrilsalas May 6, 2022
b991af7
Update samples/snippets/requirements.txt
abecerrilsalas May 6, 2022
b61f18a
Merge branch 'main' into apb_feat_add_support_for_table_clones
abecerrilsalas May 9, 2022
2ed89bf
Merge branch 'main' into apb_feat_add_support_for_table_clones
steffnay May 10, 2022
572f440
Merge branch 'main' into apb_feat_add_support_for_table_clones
steffnay May 16, 2022
1f01314
Merge branch 'main' into apb_feat_add_support_for_table_clones
steffnay May 20, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Table
table.Row
table.RowIterator
table.SnapshotDefinition
table.CloneDefinition
table.Table
table.TableListItem
table.TableReference
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import Row
from google.cloud.bigquery.table import SnapshotDefinition
from google.cloud.bigquery.table import CloneDefinition
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TimePartitioningType
Expand Down Expand Up @@ -132,6 +133,7 @@
"RangePartitioning",
"Row",
"SnapshotDefinition",
"CloneDefinition",
"TimePartitioning",
"TimePartitioningType",
# Jobs
Expand Down
3 changes: 3 additions & 0 deletions google/cloud/bigquery/job/copy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class OperationType:
SNAPSHOT = "SNAPSHOT"
"""The source table type is TABLE and the destination table type is SNAPSHOT."""

CLONE = "CLONE"
"""The source table type is TABLE and the destination table type is CLONE."""

RESTORE = "RESTORE"
"""The source table type is SNAPSHOT and the destination table type is TABLE."""

Expand Down
37 changes: 37 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ class Table(_TableBase):
"time_partitioning": "timePartitioning",
"schema": "schema",
"snapshot_definition": "snapshotDefinition",
"clone_definition": "cloneDefinition",
"streaming_buffer": "streamingBuffer",
"self_link": "selfLink",
"time_partitioning": "timePartitioning",
Expand Down Expand Up @@ -929,6 +930,19 @@ def snapshot_definition(self) -> Optional["SnapshotDefinition"]:
snapshot_info = SnapshotDefinition(snapshot_info)
return snapshot_info

@property
def clone_definition(self) -> Optional["CloneDefinition"]:
"""Information about the clone. This value is set via clone creation.

See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition
"""
clone_info = self._properties.get(
self._PROPERTY_TO_API_FIELD["clone_definition"]
)
if clone_info is not None:
clone_info = CloneDefinition(clone_info)
return clone_info

@classmethod
def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
Expand Down Expand Up @@ -1304,6 +1318,29 @@ def __init__(self, resource: Dict[str, Any]):
)


class CloneDefinition:
"""Information about base table and clone time of the clone.

See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition

Args:
resource: Clone definition representation returned from the API.
"""

def __init__(self, resource: Dict[str, Any]):
self.base_table_reference = None
if "baseTableReference" in resource:
self.base_table_reference = TableReference.from_api_repr(
resource["baseTableReference"]
)

self.clone_time = None
if "cloneTime" in resource:
self.clone_time = google.cloud._helpers._rfc3339_to_datetime(
resource["cloneTime"]
)


class Row(object):
"""A BigQuery row.

Expand Down
2 changes: 1 addition & 1 deletion samples/magics/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ grpcio==1.46.1
ipython===7.31.1; python_version == '3.7'
ipython===8.0.1; python_version == '3.8'
ipython==8.3.0; python_version >= '3.9'
matplotlib==3.5.2
matplotlib==3.5.1
pandas===1.3.5; python_version == '3.7'
pandas==1.4.2; python_version >= '3.8'
pyarrow==8.0.0
Expand Down
54 changes: 54 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2190,3 +2190,57 @@ def test_table_snapshots(dataset_id):
rows_iter = client.list_rows(source_table_path)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]


def test_table_clones(dataset_id):
from google.cloud.bigquery import CopyJobConfig
from google.cloud.bigquery import OperationType

client = Config.CLIENT

table_path_source = f"{client.project}.{dataset_id}.test_table_clone"
clone_table_path = f"{table_path_source}_clone"

# Create the table before loading so that the column order is predictable.
schema = [
bigquery.SchemaField("foo", "INTEGER"),
bigquery.SchemaField("bar", "STRING"),
]
source_table = helpers.retry_403(Config.CLIENT.create_table)(
Table(table_path_source, schema=schema)
)

# Populate the table with initial data.
rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}]
load_job = Config.CLIENT.load_table_from_json(rows, source_table)
load_job.result()

# Now create a clone before modifying the original table data.
copy_config = CopyJobConfig()
copy_config.operation_type = OperationType.CLONE
copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

copy_job = client.copy_table(
sources=table_path_source,
destination=clone_table_path,
job_config=copy_config,
)
copy_job.result()

# List rows from the source table and compare them to rows from the clone.
rows_iter = client.list_rows(table_path_source)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]

rows_iter = client.list_rows(clone_table_path)
rows = sorted(row.values() for row in rows_iter)
assert rows == [(1, "one"), (2, "two")]

# Compare properties of the source and clone table.
source_table_props = client.get_table(table_path_source)
clone_table_props = client.get_table(clone_table_path)

assert source_table_props.schema == clone_table_props.schema
assert source_table_props.num_bytes == clone_table_props.num_bytes
assert source_table_props.num_rows == clone_table_props.num_rows
assert source_table_props.description == clone_table_props.description
74 changes: 74 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,40 @@ def test_snapshot_definition_set(self):
2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
)

def test_clone_definition_not_set(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)

assert table.clone_definition is None

def test_clone_definition_set(self):
from google.cloud._helpers import UTC
from google.cloud.bigquery.table import CloneDefinition

dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)

table._properties["cloneDefinition"] = {
"baseTableReference": {
"projectId": "project_x",
"datasetId": "dataset_y",
"tableId": "table_z",
},
"cloneTime": "2010-09-28T10:20:30.123Z",
}

clone = table.clone_definition

assert isinstance(clone, CloneDefinition)
assert clone.base_table_reference.path == (
"/projects/project_x/datasets/dataset_y/tables/table_z"
)
assert clone.clone_time == datetime.datetime(
2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
)

def test_description_setter_bad_value(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
Expand Down Expand Up @@ -1789,6 +1823,46 @@ def test_ctor_full_resource(self):
assert instance.snapshot_time == expected_time


class TestCloneDefinition:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.table import CloneDefinition

return CloneDefinition

@classmethod
def _make_one(cls, *args, **kwargs):
klass = cls._get_target_class()
return klass(*args, **kwargs)

def test_ctor_empty_resource(self):
instance = self._make_one(resource={})
assert instance.base_table_reference is None
assert instance.clone_time is None

def test_ctor_full_resource(self):
from google.cloud._helpers import UTC
from google.cloud.bigquery.table import TableReference

resource = {
"baseTableReference": {
"projectId": "my-project",
"datasetId": "your-dataset",
"tableId": "our-table",
},
"cloneTime": "2005-06-07T19:35:02.123Z",
}
instance = self._make_one(resource)

expected_table_ref = TableReference.from_string(
"my-project.your-dataset.our-table"
)
assert instance.base_table_reference == expected_table_ref

expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC)
assert instance.clone_time == expected_time


class TestRow(unittest.TestCase):
def test_row(self):
from google.cloud.bigquery.table import Row
Expand Down
0