From bcae4d28a889ca476c55c4484b8678c86ec46e2c Mon Sep 17 00:00:00 2001 From: Tim Swena Date: Wed, 14 May 2025 11:27:03 -0500 Subject: [PATCH] docs: use partial ordering mode in the quickstart sample --- samples/snippets/quickstart.py | 19 ++++++++++--------- samples/snippets/quickstart_test.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index c26c6f4442..adc85fa92d 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -14,16 +14,7 @@ def run_quickstart(project_id: str) -> None: - import bigframes - - session_options = bigframes.BigQueryOptions() - session = bigframes.connect(session_options) - your_gcp_project_id = project_id - query_or_table = "bigquery-public-data.ml_datasets.penguins" - df_session = session.read_gbq(query_or_table) - average_body_mass = df_session["body_mass_g"].mean() - print(f"average_body_mass (df_session): {average_body_mass}") # [START bigquery_bigframes_quickstart] import bigframes.pandas as bpd @@ -33,10 +24,20 @@ def run_quickstart(project_id: str) -> None: # On BigQuery Studio, the project ID is automatically detected. bpd.options.bigquery.project = your_gcp_project_id + # Use "partial" ordering mode to generate more efficient queries, but the + # order of the rows in DataFrames may not be deterministic if you have not + # explictly sorted it. Some operations that depend on the order, such as + # head() will not function until you explictly order the DataFrame. Set the + # ordering mode to "strict" (default) for more pandas compatibility. + bpd.options.bigquery.ordering_mode = "partial" + # Create a DataFrame from a BigQuery table query_or_table = "bigquery-public-data.ml_datasets.penguins" df = bpd.read_gbq(query_or_table) + # Efficiently preview the results using the .peek() method. + df.peek() + # Use the DataFrame just as you would a pandas DataFrame, but calculations # happen in the BigQuery query engine instead of the local system. average_body_mass = df["body_mass_g"].mean() diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 4abc87d011..a650f8365d 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -33,4 +33,4 @@ def test_quickstart( quickstart.run_quickstart(your_project_id) out, _ = capsys.readouterr() - assert "average_body_mass (df_session):" in out + assert "average_body_mass:" in out