10000 Update pandas/bqstorage samples to latest library changes. (#2413) · aldric/python-docs-samples@27eddc7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 27eddc7

Browse files
authored
Update pandas/bqstorage samples to latest library changes. (GoogleCloudPlatform#2413)
1 parent 9e8ac90 commit 27eddc7

File tree

5 files changed

+78
-60
lines changed

5 files changed

+78
-60
lines changed
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
1-
google-cloud-bigquery[pandas,pyarrow]==1.9.0
2-
pandas-gbq==0.9.0
1+
google-cloud-bigquery==1.20.0
2+
google-cloud-bigquery-storage==0.7.0
3+
pandas==0.25.1
4+
pandas-gbq==0.11.0
5+
pyarrow==0.14.1

bigquery/pandas-gbq-migration/samples_test.py

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,41 @@ def test_pandas_gbq_query():
8181
assert len(df) > 0
8282

8383

84+
def test_client_library_query_bqstorage():
85+
# [START bigquery_migration_client_library_query_bqstorage]
86+
import google.auth
87+
from google.cloud import bigquery
88+
from google.cloud import bigquery_storage_v1beta1
89+
90+
# Create a BigQuery client and a BigQuery Storage API client with the same
91+
# credentials to avoid authenticating twice.
92+
credentials, project_id = google.auth.default(
93+
scopes=["https://www.googleapis.com/auth/cloud-platform"]
94+
)
95+
client = bigquery.Client(credentials=credentials, project=project_id)
96+
bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient(
97+
credentials=credentials
98+
)
99+
sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"
100+
101+
# Use a BigQuery Storage API client to download results more quickly.
102+
df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client)
103+
# [END bigquery_migration_client_library_query_bqstorage]
104+
assert len(df) > 0
105+
106+
107+
def test_pandas_gbq_query_bqstorage():
108+
# [START bigquery_migration_pandas_gbq_query_bqstorage]
109+
import pandas
110+
111+
sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"
112+
113+
# Use the BigQuery Storage API to download results more quickly.
114+
df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True)
115+
# [END bigquery_migration_pandas_gbq_query_bqstorage]
116+
assert len(df) > 0
117+
118+
84119
def test_client_library_legacy_query():
85120
# [START bigquery_migration_client_library_query_legacy]
86121
from google.cloud import bigquery
@@ -184,16 +219,28 @@ def test_client_library_upload_from_dataframe(temp_dataset):
184219
}
185220
)
186221
client = bigquery.Client()
187-
dataset_ref = client.dataset('my_dataset')
222+
table_id = 'my_dataset.new_table'
188223
# [END bigquery_migration_client_library_upload_from_dataframe]
189-
dataset_ref = client.dataset(temp_dataset.dataset_id)
224+
table_id = (
225+
temp_dataset.dataset_id
226+
+ ".test_client_library_upload_from_dataframe"
227+
)
190228
# [START bigquery_migration_client_library_upload_from_dataframe]
191-
table_ref = dataset_ref.table('new_table')
229+
# Since string columns use the "object" dtype, pass in a (partial) schema
230+
# to ensure the correct BigQuery data type.
231+
job_config = bigquery.LoadJobConfig(schema=[
232+
bigquery.SchemaField("my_string", "STRING"),
233+
])
234+
235+
job = client.load_table_from_dataframe(
236+
df, table_id, job_config=job_config
237+
)
192238

193-
client.load_table_from_dataframe(df, table_ref).result()
239+
# Wait for the load job to complete.
240+
job.result()
194241
# [END bigquery_migration_client_library_upload_from_dataframe]
195242
client = bigquery.Client()
196-
table = client.get_table(table_ref)
243+
table = client.get_table(table_id)
197244
assert table.num_rows == 3
198245

199246

@@ -209,16 +256,16 @@ def test_pandas_gbq_upload_from_dataframe(temp_dataset):
209256
'my_float64': [4.0, 5.0, 6.0],
210257
}
211258
)
212-
full_table_id = 'my_dataset.new_table'
213-
project_id = 'my-project-id'
259+
table_id = 'my_dataset.new_table'
214260
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
215-
table_id = 'new_table'
216-
full_table_id = '{}.{}'.format(temp_dataset.dataset_id, table_id)
217-
project_id = os.environ['GCLOUD_PROJECT']
261+
table_id = (
262+
temp_dataset.dataset_id
263+
+ ".test_pandas_gbq_upload_from_dataframe"
264+
)
218265
# [START bigquery_migration_pandas_gbq_upload_from_dataframe]
219266

220-
df.to_gbq(full_table_id, project_id=project_id)
267+
df.to_gbq(table_id)
221268
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
222269
client = bigquery.Client()
223-
table = client.get_table(temp_dataset.table(table_id))
270+
table = client.get_table(table_id)
224271
assert table.num_rows == 3

bigquery_storage/to_dataframe/jupyter_test.py

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ def test_jupyter_small_query(ipython):
7575
assert "stackoverflow" in ip.user_ns # verify that variable exists
7676

7777

78-
@pytest.mark.skipif(
79-
"TRAVIS" in os.environ, reason="Not running long-running queries on Travis"
80-
)
8178
def test_jupyter_tutorial(ipython):
8279
ip = IPython.get_ipython()
8380
ip.extension_manager.load_extension("google.cloud.bigquery")
@@ -86,33 +83,18 @@ def test_jupyter_tutorial(ipython):
8683
# speed-up of using the BigQuery Storage API to download the results.
8784
sample = """
8885
# [START bigquerystorage_jupyter_tutorial_query]
89-
%%bigquery nodejs_deps --use_bqstorage_api
90-
SELECT
91-
dependency_name,
92-
dependency_platform,
93-
project_name,
94-
project_id,
95-
version_number,
96-
version_id,
97-
dependency_kind,
98-
optional_dependency,
99-
dependency_requirements,
100-
dependency_project_id
101-
FROM
102-
`bigquery-public-data.libraries_io.dependencies`
103-
WHERE
104-
LOWER(dependency_platform) = 'npm'
105-
LIMIT 2500000
86+
%%bigquery tax_forms --use_bqstorage_api
87+
SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
10688
# [END bigquerystorage_jupyter_tutorial_query]
10789
"""
10890
result = ip.run_cell(_strip_region_tags(sample))
10991
result.raise_error() # Throws an exception if the cell failed.
11092

111-
assert "nodejs_deps" in ip.user_ns # verify that variable exists
112-
nodejs_deps = ip.user_ns["nodejs_deps"]
93+
assert "tax_forms" in ip.user_ns # verify that variable exists
94+
tax_forms = ip.user_ns["tax_forms"]
11395

11496
# [START bigquerystorage_jupyter_tutorial_results]
115-
nodejs_deps.head()
97+
tax_forms.head()
11698
# [END bigquerystorage_jupyter_tutorial_results]
11799

118100
# [START bigquerystorage_jupyter_tutorial_context]
@@ -123,26 +105,11 @@ def test_jupyter_tutorial(ipython):
123105

124106
sample = """
125107
# [START bigquerystorage_jupyter_tutorial_query_default]
126-
%%bigquery java_deps
127-
SELECT
128-
dependency_name,
129-
dependency_platform,
130-
project_name,
131-
project_id,
132-
version_number,
133-
version_id,
134-
dependency_kind,
135-
optional_dependency,
136-
dependency_requirements,
137-
dependency_project_id
138-
FROM
139-
`bigquery-public-data.libraries_io.dependencies`
140-
WHERE
141-
LOWER(dependency_platform) = 'maven'
142-
LIMIT 2500000
108+
%%bigquery tax_forms
109+
SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
143110
# [END bigquerystorage_jupyter_tutorial_query_default]
144111
"""
145112
result = ip.run_cell(_strip_region_tags(sample))
146113
result.raise_error() # Throws an exception if the cell failed.
147114

148-
assert "java_deps" in ip.user_ns # verify that variable exists
115+
assert "tax_forms" in ip.user_ns # verify that variable exists
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
google-auth==1.6.2
2-
google-cloud-bigquery-storage==0.6.0
3-
google-cloud-bigquery==1.17.0
4-
pyarrow==0.13.0
2+
google-cloud-bigquery-storage==0.7.0
3+
google-cloud-bigquery==1.20.0
4+
pyarrow==0.14.1
55
ipython==7.2.0
6-
pandas==0.24.2
6+
pandas==0.25.1

noxfile.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ def _setup_appengine_sdk(session):
167167
PY3_ONLY_SAMPLES = [
168168
sample for sample in ALL_TESTED_SAMPLES
169169
if (sample.startswith('./appengine/standard_python37')
170-
or sample.startswith('./functions/'))]
170+
or sample.startswith('./functions/')
171+
or sample.startswith('./bigquery/pandas-gbq-migration'))]
171172
NON_GAE_STANDARD_SAMPLES_PY2 = sorted(list((
172173
set(ALL_TESTED_SAMPLES) -
173174
set(GAE_STANDARD_SAMPLES)) -

0 commit comments

Comments
 (0)
0