8000 Add bigquery snippets · saicheems/python-docs-samples@37ba6d3 · GitHub
[go: up one dir, main page]

Skip to content

Commit 37ba6d3

Browse files
author
Jon Wayne Parrott
committed
Add bigquery snippets
Change-Id: I148b9f444ad5e481d4f091b53121873b51de191e
1 parent d1ce64d commit 37ba6d3

File tree

2 files changed

+247
-0
lines changed

2 files changed

+247
-0
lines changed

bigquery/cloud-client/snippets.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2016 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Samples that demonstrate basic operations in the BigQuery API.
18+
19+
For more information, see the README.md under /bigquery.
20+
21+
Example invocation:
22+
$ python snippets.py list-datasets
23+
24+
The dataset and table should already exist.
25+
"""
26+
27+
import argparse
28+
29+
from gcloud import bigquery
30+
31+
32+
def list_projects():
33+
raise NotImplementedError(
34+
'https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143')
35+
36+
37+
def list_datasets(project=None):
38+
"""Lists all datasets in a given project.
39+
40+
If no project is specified, then the currently active project is used
41+
"""
42+
bigquery_client = bigquery.Client(project=project)
43+
44+
datasets = []
45+
page_token = None
46+
47+
while True:
48+
results, page_token = bigquery_client.list_datasets(
49+
page_token=page_token)
50+
datasets.extend(results)
51+
52+
if not page_token:
53+
break
54+
55+
for dataset in datasets:
56+
print(dataset.name)
57+
58+
59+
def list_tables(dataset_name, project=None):
60+
"""Lists all of the tables in a given dataset.
61+
62+
If no project is specified, then the currently active project is used.
63+
"""
64+
bigquery_client = bigquery.Client(project=project)
65+
dataset = bigquery_client.dataset(dataset_name)
66+
67+
if not dataset.exists():
68+
print('Dataset {} does not exist.'.format(dataset_name))
69+
70+
tables = []
71+
page_token = None
72+
73+
while True:
74+
results, page_token = dataset.list_tables(page_token=page_token)
75+
tables.extend(results)
76+
77+
if not page_token:
78+
break
79+
80+
for table in tables:
81+
print(table.name)
82+
83+
84+
def list_rows(dataset_name, table_name, project=None):
85+
"""Prints rows in the given table.
86+
87+
Will print 25 rows at most for brevity as tables can contain large amounts
88+
of rows.
89+
90< 67ED /td>+
If no project is specified, then the currently active project is used.
91+
"""
92+
bigquery_client = bigquery.Client(project=project)
93+
dataset = bigquery_client.dataset(dataset_name)
94+
table = dataset.table(table_name)
95+
96+
if not table.exists():
97+
print('Table {}:{} does not exist.'.format(dataset_name, table_name))
98+
99+
# Reload the table so that the schema is available.
100+
table.reload()
101+
102+
rows = []
103+
page_token = None
104+
105+
while len(rows) < 25:
106+
results, total_rows, page_token = table.fetch_data(
107+
max_results=25, page_token=page_token)
108+
rows.extend(results)
109+
110+
if not page_token:
111+
break
112+
113+
# Use format to create a simple table.
114+
format_string = '{:<16} ' * len(table.schema)
115+
116+
# Print schema field names
117+
field_names = [field.name for field in table.schema]
118+
print(format_string.format(*field_names))
119+
120+
for row in rows:
121+
print(format_string.format(*row))
122+
123+
124+
def delete_table(dataset_name, table_name, project=None):
125+
"""Deletes a table in a given dataset.
126+
127+
If no project is specified, then the currently active project is used.
128+
"""
129+
bigquery_client = bigquery.Client(project=project)
130+
dataset = bigquery_client.dataset(dataset_name)
131+
table = dataset.table(table_name)
132+
133+
table.delete()
134+
135+
print('Table {}:{} deleted.'.format(dataset_name, table_name))
136+
137+
138+
if __name__ == '__main__':
139+
parser = argparse.ArgumentParser(
140+
description=__doc__,
141+
formatter_class=argparse.RawDescriptionHelpFormatter)
142+
parser.add_argument('--project', default=None)
143+
144+
subparsers = parser.add_subparsers(dest='command')
145+
146+
list_datasets_parser = subparsers.add_parser(
147+
'list-datasets', help=list_datasets.__doc__)
148+
149+
list_tables_parser = subparsers.add_parser(
150+
'list-tables', help=list_tables.__doc__)
151+
list_tables_parser.add_argument('dataset_name')
152+
153+
list_rows_parser = subparsers.add_parser(
154+
'list-rows', help=list_rows.__doc__)
155+
list_rows_parser.add_argument('dataset_name')
156+
list_rows_parser.add_argument('table_name')
157+
158+
delete_table_parser = subparsers.add_parser(
159+
'delete-table', help=delete_table.__doc__)
160+
delete_table_parser.add_argument('dataset_name')
161+
delete_table_parser.add_argument('table_name')
162+
163+
args = parser.parse_args()
164+
165+
if args.command == 'list-datasets':
166+
list_datasets(args.project)
167+
elif args.command == 'list-tables':
168+
list_tables(args.dataset_name, args.project)
169+
elif args.command == 'list-rows':
170+
list_rows(args.dataset_name, args.table_name, args.project)
171+
elif args.command == 'delete-table':
172+
delete_table(args.dataset_name, args.table_name, args.project)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Copyrigh 10000 t 2015, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
from gcloud import bigquery
15+
import pytest
16+
import snippets
17+
18+
19+
DATASET_ID = 'test_dataset'
20+
TABLE_ID = 'test_import_table'
21+
22+
23+
@pytest.mark.xfail(
24+
strict=True,
25+
reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2143')
26+
def test_list_projects():
27+
snippets.list_projects()
28+
# No need to check the ouput, lack of exception is enough.
29+
30+
31+
def test_list_datasets(capsys):
32+
# Requires the dataset to have been created in the test project.
33+
snippets.list_datasets()
34+
35+
out, _ = capsys.readouterr()
36+
37+
assert DATASET_ID in out
38+
39+
40+
def test_list_tables(capsys):
41+
# Requires teh dataset and table to have been created in the test project.
42+
snippets.list_tables(DATASET_ID)
43+
44+
out, _ = capsys.readouterr()
45+
46+
assert TABLE_ID in out
47+
48+
49+
def test_list_rows(capsys):
50+
# Requires the dataset and table to have been created in the test project.
51+
52+
# Check for the schema. It's okay if the table is empty as long as there
53+
# aren't any errors.
54+
55+
snippets.list_rows(DATASET_ID, TABLE_ID)
56+
57+
out, _ = capsys.readouterr()
58+
59+
assert 'Name' in out
60+
assert 'Age' in out
61+
62+
63+
def test_delete_table(capsys):
64+
# Create a table to delete
65+
bigquery_client = bigquery.Client()
66+
dataset = bigquery_client.dataset(DATASET_ID)
67+
table = dataset.table('test_delete_table')
68+
69+
if not table.exists():
70+
table.schema = [bigquery.SchemaField('id', 'INTEGER')]
71+
table.create()
72+
73+
snippets.delete_table(DATASET_ID, table.name)
74+
75+
assert not table.exists()

0 commit comments

Comments
 (0)
0