8000 feat: Add `bpd.options.display.repr_mode = "anywidget"` to create an interactive display of the results by shuoweil · Pull Request #1820 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content

feat: Add bpd.options.display.repr_mode = "anywidget" to create an interactive display of the results #1820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/_config/display_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DisplayOptions:
max_columns: int = 20
max_rows: int = 25
progress_bar: Optional[str] = "auto"
repr_mode: Literal["head", "deferred"] = "head"
repr_mode: Literal["head", "deferred", "anywidget"] = "head"

max_info_columns: int = 100
max_info_rows: Optional[int] = 200000
Expand Down
4 changes: 3 additions & 1 deletion bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,9 @@ def __repr__(self) -> str:
# metadata, like we do with DataFrame.
opts = bigframes.options.display
max_results = opts.max_rows
if opts.repr_mode == "deferred":
# anywdiget mode uses the same display logic as the "deferred" mode
# for faster execution
if opts.repr_mode in ("deferred", "anywidget"):
_, dry_run_query_job = self._block._compute_dry_run()
return formatter.repr_query_job(dry_run_query_job)

Expand Down
21 changes: 20 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,9 @@ def __repr__(self) -> str:

opts = bigframes.options.display
max_results = opts.max_rows
if opts.repr_mode == "deferred":
# anywdiget mode uses the same display logic as the "deferred" mode
# for faster execution
if opts.repr_mode in ("deferred", "anywidget"):
return formatter.repr_query_job(self._compute_dry_run())

# TODO(swast): pass max_columns and get the true column count back. Maybe
Expand Down Expand Up @@ -774,6 +776,23 @@ def _repr_html_(self) -> str:
if opts.repr_mode == "deferred":
return formatter.repr_query_job(self._compute_dry_run())

if opts.repr_mode == "anywidget":
import anywidget # type: ignore

# create an iterator for the data batches
batches = self.to_pandas_batches()

# get the first page result
try:
first_page = next(iter(batches))
except StopIteration:
first_page = pandas.DataFrame(columns=self.columns)

# Instantiate and return the widget. The widget's frontend will
# handle the display of the table and pagination
return anywidget.AnyWidget(dataframe=first_page)

self._cached()
df = self.copy()
if bigframes.options.display.blob_display:
blob_cols = [
Expand Down
4 changes: 3 additions & 1 deletion bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ def __repr__(self) -> str:
# metadata, like we do with DataFrame.
opts = bigframes.options.display
max_results = opts.max_rows
if opts.repr_mode == "deferred":
# anywdiget mode uses the same display logic as the "deferred" mode
# for faster execution
if opts.repr_mode in ("deferred", "anywidget"):
return formatter.repr_query_job(self._compute_dry_run())

self._cached()
Expand Down
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ ignore_missing_imports = True

[mypy-google.cloud.bigtable]
ignore_missing_imports = True

[mypy-anywidget]
ignore_missing_imports = True
149 changes: 149 additions & 0 deletions notebooks/dataframes/anywidget_mode.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d10bfca4",
"metadata": {},
"outputs": [],
"source": [
"# Copyright 2025 Google LLC\n",
"#\n",
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# https://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License."
]
},
{
"cell_type": "markdown",
"id": "acca43ae",
"metadata": {},
"source": [
"# Demo to Show Anywidget mode"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ca22f059",
"metadata": {},
"outputs": [],
"source": [
"import bigframes.pandas as bpd"
]
},
{
"cell_type": "markdown",
"id": "04406a4d",
"metadata": {},
"source": [
"Set the display option to use anywidget"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1bc5aaf3",
"metadata": {},
"outputs": [],
"source": [
"bpd.options.display.repr_mode = \"anywidget\""
]
},
{
"cell_type": "markdown",
"id": "0a354c69",
"metadata": {},
"source": [
"Display the dataframe in anywidget mode"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f289d250",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Query job 91997f19-1768-4360-afa7-4a431b3e2d22 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:91997f19-1768-4360-afa7-4a431b3e2d22&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computation deferred. Computation will process 171.4 MB\n"
]
}
],
"source": [
"df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"id": "3a73e472",
"metadata": {},
"source": [
"Display Series in anywidget mode"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "42bb02ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computation deferred. Computation will process 171.4 MB\n"
]
}
],
"source": [
"test_series = df[\"year\"]\n",
"print(test_series)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
15 changes: 11 additions & 4 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@
]
UNIT_TEST_LOCAL_DEPENDENCIES: List[str] = []
UNIT_TEST_DEPENDENCIES: List[str] = []
UNIT_TEST_EXTRAS: List[str] = ["tests"]
UNIT_TEST_EXTRAS: List[str] = ["tests", "anywidget"]
UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
"3.12": ["tests", "polars", "scikit-learn"],
"3.12": ["tests", "polars", "scikit-learn", "anywidget"],
}

# 3.10 is needed for Windows tests as it is the only version installed in the
Expand All @@ -106,9 +106,9 @@
SYSTEM_TEST_DEPENDENCIES: List[str] = []
SYSTEM_TEST_EXTRAS: List[str] = []
SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
"3.9": ["tests"],
"3.9": ["tests", "anywidget"],
"3.10": ["tests"],
"3.12": ["tests", "scikit-learn", "polars"],
"3.12": ["tests", "scikit-learn", "polars", "anywidget"],
"3.13": ["tests", "polars"],
}

Expand Down Expand Up @@ -276,6 +276,7 @@ def mypy(session):
"types-setuptools",
"types-tabulate",
"polars",
"anywidget",
]
)
| set(SYSTEM_TEST_STANDARD_DEPENDENCIES)
Expand Down Expand Up @@ -518,6 +519,7 @@ def docs(session):
SPHINX_VERSION,
"alabaster",
"recommonmark",
"anywidget",
)

shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
Expand Down Expand Up @@ -560,6 +562,7 @@ def docfx(session):
"alabaster",
"recommonmark",
"gcp-sphinx-docfx-yaml==3.0.1",
"anywidget",
)

shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
Expand Down Expand Up @@ -763,6 +766,7 @@ def notebook(session: nox.Session):
"google-cloud-aiplatform",
"matplotlib",
"seaborn",
"anywidget",
)

notebooks_list = list(pathlib.Path("notebooks/").glob("*/*.ipynb"))
Expand Down Expand Up @@ -805,6 +809,9 @@ def notebook(session: nox.Session):
# continuously tested.
"notebooks/apps/synthetic_data_generation.ipynb",
"notebooks/multimodal/multimodal_dataframe.ipynb", # too slow
# This anywidget notebook uses deferred execution, so it won't
# produce metrics for the performance benchmark script.
"notebooks/dataframes/anywidget_mode.ipynb",
]

# TODO: remove exception for Python 3.13 cloud run adds a runtime for it (internal issue 333742751)
Expand Down
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
"nox",
"google-cloud-testutils",
],
# install anywidget for SQL
"anywidget": [
"anywidget>=0.9.18",
],
}
extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))

Expand Down
2 changes: 1 addition & 1 deletion tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ def test_repr_html_w_all_rows(scalars_dfs, session):
+ f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
)
assert actual == expected
assert (executions_post - executions_pre) <= 2
assert (executions_post - executions_pre) <= 3


def test_df_column_name_with_space(scalars_dfs):
Expand Down
16 changes: 16 additions & 0 deletions tests/system/small/test_progress_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import numpy as np
import pandas as pd
import pytest

import bigframes as bf
import bigframes.formatting_helpers as formatting_helpers
Expand Down Expand Up @@ -164,3 +165,18 @@ def test_query_job_dry_run_series(penguins_df_default_index: bf.dataframe.DataFr
with bf.option_context("display.repr_mode", "deferred"):
series_result = repr(penguins_df_default_index["body_mass_g"])
assert EXPECTED_DRY_RUN_MESSAGE in series_result


def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFrame):
pytest.importorskip("anywidget")
with bf.option_context("display.repr_mode", "anywidget"):
actual_repr = repr(penguins_df_default_index)
assert EXPECTED_DRY_RUN_MESSAGE in actual_repr


def test_repr_anywidget_idex(penguins_df_default_index: bf.dataframe.DataFrame):
pytest.importorskip("anywidget")
with bf.option_context("display.repr_mode", "anywidget"):
index = penguins_df_default_index.index
actual_repr = repr(index)
assert EXPECTED_DRY_RUN_MESSAGE in actual_repr
0