From 08f6c4c3746b95757cead06f9d542b5427d64b9c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 23 Jun 2025 22:24:14 +0000 Subject: [PATCH 01/23] code update --- bigframes/dataframe.py | 20 +-- bigframes/display/__init__.py | 27 ++++ bigframes/display/anywidget.py | 174 ++++++++++++++++++++++ notebooks/dataframes/anywidget_mode.ipynb | 50 +++++-- noxfile.py | 2 + tests/system/small/test_progress_bar.py | 15 +- 6 files changed, 262 insertions(+), 26 deletions(-) create mode 100644 bigframes/display/__init__.py create mode 100644 bigframes/display/anywidget.py diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 69b251fd5b..79aadfc696 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -780,20 +780,16 @@ def _repr_html_(self) -> str: return formatter.repr_query_job(self._compute_dry_run()) if opts.repr_mode == "anywidget": - import anywidget # type: ignore - - # create an iterator for the data batches - batches = self.to_pandas_batches() - - # get the first page result try: - first_page = next(iter(batches)) - except StopIteration: - first_page = pandas.DataFrame(columns=self.columns) + from bigframes import display - # Instantiate and return the widget. The widget's frontend will - # handle the display of the table and pagination - return anywidget.AnyWidget(dataframe=first_page) + return display.TableWidget(self) + except AttributeError: + # Fallback if anywidget is not available + warnings.warn( + "Anywidget mode is not available, falling back to deferred mode." + ) + return formatter.repr_query_job(self._compute_dry_run()) self._cached() df = self.copy() diff --git a/bigframes/display/__init__.py b/bigframes/display/__init__.py new file mode 100644 index 0000000000..5cd8bbb6e4 --- /dev/null +++ b/bigframes/display/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import warnings + +try: + import anywidget # noqa + + from .anywidget import TableWidget # noqa + + __all__ = ["TableWidget"] +except ImportError: + msg = "Anywidget mode not available as anywidget is not installed." + warnings.warn(msg) + __all__ = [] diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py new file mode 100644 index 0000000000..1f108f29e3 --- /dev/null +++ b/bigframes/display/anywidget.py @@ -0,0 +1,174 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid + +import anywidget # type: ignore +import pandas as pd +import traitlets + +import bigframes + + +class TableWidget(anywidget.AnyWidget): + """ + An interactive, paginated table widget for BigFrames DataFrames. + """ + + _esm = """ + function render({ model, el }) { + const container = document.createElement('div'); + container.innerHTML = model.get('table_html'); + + const buttonContainer = document.createElement('div'); + const prevPage = document.createElement('button'); + const label = document.createElement('span'); + const nextPage = document.createElement('button'); + prevPage.type = 'button'; + nextPage.type = 'button'; + prevPage.textContent = 'Prev'; + nextPage.textContent = 'Next'; + + // update button states and label + function updateButtonStates() { + const totalPages = Math.ceil(model.get('row_count') / model.get('page_size')); + const currentPage = model.get('page'); + + // Update label + label.textContent = `Page ${currentPage + 1} of ${totalPages}`; + + // Update button states + prevPage.disabled = currentPage === 0; + nextPage.disabled = currentPage >= totalPages - 1; + } + + // Initial button state setup + updateButtonStates(); + + prevPage.addEventListener('click', () => { + let newPage = model.get('page') - 1; + if (newPage < 0) { + newPage = 0; + } + console.log(`Setting page to ${newPage}`) + model.set('page', newPage); + model.save_changes(); + }); + + nextPage.addEventListener('click', () => { + const newPage = model.get('page') + 1; + console.log(`Setting page to ${newPage}`) + model.set('page', newPage); + model.save_changes(); + }); + + model.on('change:table_html', () => { + container.innerHTML = model.get('table_html'); + updateButtonStates(); // Update button states when table changes + }); + + buttonContainer.appendChild(prevPage); + buttonContainer.appendChild(label); + buttonContainer.appendChild(nextPage); + el.appendChild(container); + el.appendChild(buttonContainer); + } + export default { render }; + """ + + page = traitlets.Int(0).tag(sync=True) + page_size = traitlets.Int(25).tag(sync=True) + row_count = traitlets.Int(0).tag(sync=True) + table_html = traitlets.Unicode().tag(sync=True) + + def __init__(self, dataframe): + """ + Initialize the TableWidget. + + Args: + dataframe: The Bigframes Dataframe to display + """ + super().__init__() + self._dataframe = dataframe + + # respect display options + self.page_size = bigframes.options.display.max_rows + + self._batches = dataframe.to_pandas_batches(page_size=self.page_size) + self._cached_data = pd.DataFrame(columns=self._dataframe.columns) + self._table_id = str(uuid.uuid4()) + self._all_data_loaded = False + + # store the iterator as an instance variable + self._batch_iterator = None + + # len(dataframe) is expensive, since it will trigger a + # SELECT COUNT(*) query. It is a must have however. + self.row_count = len(dataframe) + + # get the initial page + self._set_table_html() + + def _get_next_batch(self): + """Gets the next batch of data from the batches generator.""" + if self._all_data_loaded: + return False + + try: + iterator = self._get_batch_iterator() + batch = next(iterator) + self._cached_data = pd.concat([self._cached_data, batch], ignore_index=True) + return True + except StopIteration: + self._all_data_loaded = True + # update row count if we loaded all data + if self.row_count == 0: + self.row_count = len(self._cached_data) + return False + except Exception as e: + raise RuntimeError(f"Error during batch processing: {str(e)}") from e + + def _get_batch_iterator(self): + """Get batch Iterator.""" + if self._batch_iterator is None: + self._batch_iterator = iter(self._batches) + return self._batch_iterator + + def _set_table_html(self): + """Sets the current html data based on the current page and page size.""" + start = self.page * self.page_size + end = start + self.page_size + + # fetch more dat if the requested page is outside our cache + while len(self._cached_data) < end: + prev_len = len(self._cached_data) + self._get_next_batch() + if len(self._cached_data) == prev_len: + break + # Get the data fro the current page + page_data = self._cached_data.iloc[start:end] + + # Generate HTML table + self.table_html = page_data.to_html( + index=False, + max_rows=None, + table_id=f"table-{self._table_id}", + classes="table table-striped table-hover", + escape=False, + ) + + @traitlets.observe("page") + def _page_changed(self, change): + """Handler for when the page nubmer is changed from the frontend""" + self._set_table_html() diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index c54f52da59..2ec8e485d1 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "ca22f059", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "1bc5aaf3", "metadata": {}, "outputs": [], @@ -68,14 +68,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "f289d250", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 91997f19-1768-4360-afa7-4a431b3e2d22 is DONE. 0 Bytes processed. Open Job" + "Query job 4d7057f3-6b68-46b2-b1e3-1dc71cb4682b is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -107,21 +107,45 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "42bb02ab", "metadata": {}, + "outputs": [], + "source": [ + "test_series = df[\"year\"]\n", + "print(test_series)" + ] + }, + { + "cell_type": "markdown", + "id": "7bcf1bb7", + "metadata": {}, + "source": [ + "Interactive BigFrames TableWidget" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ce250157", + "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computation deferred. Computation will process 171.4 MB\n" - ] + "data": { + "text/html": [ + "Computation deferred. Computation will process 171.4 MB" + ], + "text/plain": [ + "Computation deferred. Computation will process 171.4 MB" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "test_series = df[\"year\"]\n", - "print(test_series)" + "df" ] } ], diff --git a/noxfile.py b/noxfile.py index 96b59d6776..2d0edfc1b0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -431,6 +431,8 @@ def doctest(session: nox.sessions.Session): "bigframes/core/compile/polars", "--ignore", "bigframes/testing", + "--ignore", + "bigframes/display/anywidget.py", ), test_folder="bigframes", check_cov=True, diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py index 1e35a2f80f..f96ff2030d 100644 --- a/tests/system/small/test_progress_bar.py +++ b/tests/system/small/test_progress_bar.py @@ -174,9 +174,22 @@ def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFr assert EXPECTED_DRY_RUN_MESSAGE in actual_repr -def test_repr_anywidget_idex(penguins_df_default_index: bf.dataframe.DataFrame): +def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame): pytest.importorskip("anywidget") with bf.option_context("display.repr_mode", "anywidget"): index = penguins_df_default_index.index actual_repr = repr(index) assert EXPECTED_DRY_RUN_MESSAGE in actual_repr + + +def test_repr_anywidget_pagination_buttons_initial_state( + penguins_df_default_index: bf.dataframe.DataFrame, +): + pytest.importorskip("anywidget") + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(penguins_df_default_index) + assert widget.page == 0 + assert widget.page_size == bf.options.display.max_rows + assert widget.row_count > 0 From 782dee1baf8a8190390246c57d50864f90a283cb Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 23 Jun 2025 22:32:32 +0000 Subject: [PATCH 02/23] update init file --- bigframes/display/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bigframes/display/__init__.py b/bigframes/display/__init__.py index 5cd8bbb6e4..da8ee598a9 100644 --- a/bigframes/display/__init__.py +++ b/bigframes/display/__init__.py @@ -18,10 +18,9 @@ try: import anywidget # noqa - from .anywidget import TableWidget # noqa + from bigframes.display.anywidget import TableWidget __all__ = ["TableWidget"] -except ImportError: +except Exception: msg = "Anywidget mode not available as anywidget is not installed." warnings.warn(msg) - __all__ = [] From 48eae05afc3c83ffa30311ac4986070c0807005e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 23 Jun 2025 23:18:41 +0000 Subject: [PATCH 03/23] add more testcases and demo notebook --- bigframes/display/__init__.py | 1 + bigframes/display/anywidget.py | 31 +++--- notebooks/dataframes/anywidget_mode.ipynb | 128 ++++++++++++++++++---- tests/system/small/test_progress_bar.py | 13 --- 4 files changed, 126 insertions(+), 47 deletions(-) diff --git a/bigframes/display/__init__.py b/bigframes/display/__init__.py index da8ee598a9..882f1e6db5 100644 --- a/bigframes/display/__init__.py +++ b/bigframes/display/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import annotations import warnings diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 1f108f29e3..056f4319b4 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator import uuid import anywidget # type: ignore @@ -26,6 +27,8 @@ class TableWidget(anywidget.AnyWidget): An interactive, paginated table widget for BigFrames DataFrames. """ + # The _esm variable contains the JavaScript source code for the frontend + # component of the widget. _esm = """ function render({ model, el }) { const container = document.createElement('div'); @@ -97,7 +100,7 @@ def __init__(self, dataframe): Initialize the TableWidget. Args: - dataframe: The Bigframes Dataframe to display + dataframe: The Bigframes Dataframe to display. """ super().__init__() self._dataframe = dataframe @@ -105,12 +108,11 @@ def __init__(self, dataframe): # respect display options self.page_size = bigframes.options.display.max_rows + # Initialize data fetching attributes. self._batches = dataframe.to_pandas_batches(page_size=self.page_size) self._cached_data = pd.DataFrame(columns=self._dataframe.columns) self._table_id = str(uuid.uuid4()) self._all_data_loaded = False - - # store the iterator as an instance variable self._batch_iterator = None # len(dataframe) is expensive, since it will trigger a @@ -120,8 +122,13 @@ def __init__(self, dataframe): # get the initial page self._set_table_html() - def _get_next_batch(self): - """Gets the next batch of data from the batches generator.""" + def _get_next_batch(self) -> bool: + """ + Gets the next batch of data from the generator and appends to cache. + + Returns: + bool: True if a batch was successfully loaded, False otherwise. + """ if self._all_data_loaded: return False @@ -139,8 +146,8 @@ def _get_next_batch(self): except Exception as e: raise RuntimeError(f"Error during batch processing: {str(e)}") from e - def _get_batch_iterator(self): - """Get batch Iterator.""" + def _get_batch_iterator(self) -> Iterator[pd.DataFrame]: + """Lazily initializes and returns the batch iterator.""" if self._batch_iterator is None: self._batch_iterator = iter(self._batches) return self._batch_iterator @@ -150,12 +157,10 @@ def _set_table_html(self): start = self.page * self.page_size end = start + self.page_size - # fetch more dat if the requested page is outside our cache - while len(self._cached_data) < end: - prev_len = len(self._cached_data) + # fetch more data if the requested page is outside our cache + while len(self._cached_data) < end and not self._all_data_loaded: self._get_next_batch() - if len(self._cached_data) == prev_len: - break + # Get the data fro the current page page_data = self._cached_data.iloc[start:end] @@ -170,5 +175,5 @@ def _set_table_html(self): @traitlets.observe("page") def _page_changed(self, change): - """Handler for when the page nubmer is changed from the frontend""" + """Handler for when the page nubmer is changed from the frontend.""" self._set_table_html() diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 2ec8e485d1..72452f11c8 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "ca22f059", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "1bc5aaf3", "metadata": {}, "outputs": [], @@ -63,19 +63,19 @@ "id": "0a354c69", "metadata": {}, "source": [ - "Display the dataframe in anywidget mode" + "Load Sample Data" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "f289d250", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 4d7057f3-6b68-46b2-b1e3-1dc71cb4682b is DONE. 0 Bytes processed. Open Job" + "Query job 6fddde2a-5b5f-4920-a0b1-cb38e636bab3 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -83,13 +83,6 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computation deferred. Computation will process 171.4 MB\n" - ] } ], "source": [ @@ -110,7 +103,18 @@ "execution_count": null, "id": "42bb02ab", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Computation deferred. Computation will process 171.4 MB" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "test_series = df[\"year\"]\n", "print(test_series)" @@ -121,31 +125,113 @@ "id": "7bcf1bb7", "metadata": {}, "source": [ - "Interactive BigFrames TableWidget" + "Display with Pagination" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "ce250157", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/anywidget.py:138: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " self._cached_data = pd.concat([self._cached_data, batch], ignore_index=True)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/IPython/core/formatters.py:429: FormatterWarning: text/html formatter returned invalid type (expected ) for object: Computation deferred. Computation will process 171.4 MB\n", + " warnings.warn(\n" + ] + }, { "data": { - "text/html": [ - "Computation deferred. Computation will process 171.4 MB" - ], "text/plain": [ "Computation deferred. Computation will process 171.4 MB" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df" + "df.head(50)" + ] + }, + { + "cell_type": "markdown", + "id": "bb15bab6", + "metadata": {}, + "source": [ + "Progarmmatic Navigation Demo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6920d49b", + "metadata": {}, + "outputs": [], + "source": [ + "from bigframes.display.anywidget import TableWidget\n", + "import math\n", + " \n", + "# Create widget programmatically \n", + "widget = TableWidget(df) \n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\") \n", + " \n", + "# Display the widget \n", + "widget" + ] + }, + { + "cell_type": "markdown", + "id": "02cbd1be", + "metadata": {}, + "source": [ + "Test Navigation Programmatically" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12b68f15", + "metadata": {}, + "outputs": [], + "source": [ + "# Simulate button clicks programmatically \n", + "print(\"Current page:\", widget.page) \n", + " \n", + "# Go to next page \n", + "widget.page = 1 \n", + "print(\"After next:\", widget.page) \n", + " \n", + "# Go to previous page \n", + "widget.page = 0 \n", + "print(\"After prev:\", widget.page)" + ] + }, + { + "cell_type": "markdown", + "id": "9d310138", + "metadata": {}, + "source": [ + "Edge Case Demonstration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d5d13a", + "metadata": {}, + "outputs": [], + "source": [ + "# Test with very small dataset \n", + "small_df = df.head(5) \n", + "small_widget = TableWidget(small_df) \n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\") \n", + "small_widget" ] } ], diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py index f96ff2030d..8a323831b5 100644 --- a/tests/system/small/test_progress_bar.py +++ b/tests/system/small/test_progress_bar.py @@ -180,16 +180,3 @@ def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame) index = penguins_df_default_index.index actual_repr = repr(index) assert EXPECTED_DRY_RUN_MESSAGE in actual_repr - - -def test_repr_anywidget_pagination_buttons_initial_state( - penguins_df_default_index: bf.dataframe.DataFrame, -): - pytest.importorskip("anywidget") - with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display import TableWidget - - widget = TableWidget(penguins_df_default_index) - assert widget.page == 0 - assert widget.page_size == bf.options.display.max_rows - assert widget.row_count > 0 From b2c7d76d50d838c16537e57814f0f5c6fdd7eab1 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 23 Jun 2025 23:19:15 +0000 Subject: [PATCH 04/23] add testcase for anywidget buttons --- tests/system/small/test_anywidget.py | 122 +++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 tests/system/small/test_anywidget.py diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py new file mode 100644 index 0000000000..1f56cf43d9 --- /dev/null +++ b/tests/system/small/test_anywidget.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import pytest + +import bigframes as bf + + +def test_repr_anywidget_initial_state( + penguins_df_default_index: bf.dataframe.DataFrame, +): + pytest.importorskip("anywidget") + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(penguins_df_default_index) + assert widget.page == 0 + assert widget.page_size == bf.options.display.max_rows + assert widget.row_count > 0 + + +def test_repr_anywidget_pagination_navigation( + penguins_df_default_index: bf.dataframe.DataFrame, +): + """Test basic prev/next navigation functionality.""" + pytest.importorskip("anywidget") + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display.anywidget import TableWidget + + widget = TableWidget(penguins_df_default_index) + + # Test initial state + assert widget.page == 0 + + # Simulate next page click + widget.page = 1 + assert widget.page == 1 + + # Simulate prev page click + widget.page = 0 + assert widget.page == 0 + + +def test_repr_anywidget_pagination_edge_cases( + penguins_df_default_index: bf.dataframe.DataFrame, +): + """Test pagination at boundaries.""" + pytest.importorskip("anywidget") + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display.anywidget import TableWidget + + widget = TableWidget(penguins_df_default_index) + + # Test going below page 0 + widget.page = -1 + # Should stay at 0 (handled by frontend) + + # Test going beyond last page + total_pages = math.ceil(widget.row_count / widget.page_size) + widget.page = total_pages + 1 + # Should be clamped to last valid page + + +def test_repr_anywidget_pagination_different_page_sizes( + penguins_df_default_index: bf.dataframe.DataFrame, +): + """Test pagination with different page sizes.""" + pytest.importorskip("anywidget") + + # Test with smaller page size + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): + from bigframes.display.anywidget import TableWidget + + widget = TableWidget(penguins_df_default_index) + + assert widget.page_size == 5 + total_pages = math.ceil(widget.row_count / 5) + assert total_pages > 1 # Should have multiple pages + + # Navigate through several pages + for page in range(min(3, total_pages)): + widget.page = page + assert widget.page == page + + +def test_repr_anywidget_pagination_buttons_functionality( + penguins_df_default_index: bf.dataframe.DataFrame, +): + """Test complete pagination button functionality.""" + pytest.importorskip("anywidget") + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 10): + from bigframes.display.anywidget import TableWidget + + widget = TableWidget(penguins_df_default_index) + + # Test initial state + assert widget.page == 0 + assert widget.page_size == 10 + assert widget.row_count > 0 + + # Calculate expected pages + total_pages = math.ceil(widget.row_count / widget.page_size) + + # Test navigation through all pages + for page_num in range(min(total_pages, 5)): # Test first 5 pages + widget.page = page_num + assert widget.page == page_num + # Verify table_html is updated + assert len(widget.table_html) > 0 From 54089bdefbc9205343a7c4d3e165d797f1f399ef Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 24 Jun 2025 22:07:26 +0000 Subject: [PATCH 05/23] move js code to a separate file --- bigframes/display/anywidget.py | 67 ++-------------------- bigframes/display/table_widget.js | 94 +++++++++++++++++++++++++++++++ owlbot.py | 7 +++ 3 files changed, 106 insertions(+), 62 deletions(-) create mode 100644 bigframes/display/table_widget.js diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 056f4319b4..e6d0ef9603 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from importlib import resources from typing import Iterator import uuid @@ -27,68 +28,10 @@ class TableWidget(anywidget.AnyWidget): An interactive, paginated table widget for BigFrames DataFrames. """ - # The _esm variable contains the JavaScript source code for the frontend - # component of the widget. - _esm = """ - function render({ model, el }) { - const container = document.createElement('div'); - container.innerHTML = model.get('table_html'); - - const buttonContainer = document.createElement('div'); - const prevPage = document.createElement('button'); - const label = document.createElement('span'); - const nextPage = document.createElement('button'); - prevPage.type = 'button'; - nextPage.type = 'button'; - prevPage.textContent = 'Prev'; - nextPage.textContent = 'Next'; - - // update button states and label - function updateButtonStates() { - const totalPages = Math.ceil(model.get('row_count') / model.get('page_size')); - const currentPage = model.get('page'); - - // Update label - label.textContent = `Page ${currentPage + 1} of ${totalPages}`; - - // Update button states - prevPage.disabled = currentPage === 0; - nextPage.disabled = currentPage >= totalPages - 1; - } - - // Initial button state setup - updateButtonStates(); - - prevPage.addEventListener('click', () => { - let newPage = model.get('page') - 1; - if (newPage < 0) { - newPage = 0; - } - console.log(`Setting page to ${newPage}`) - model.set('page', newPage); - model.save_changes(); - }); - - nextPage.addEventListener('click', () => { - const newPage = model.get('page') + 1; - console.log(`Setting page to ${newPage}`) - model.set('page', newPage); - model.save_changes(); - }); - - model.on('change:table_html', () => { - container.innerHTML = model.get('table_html'); - updateButtonStates(); // Update button states when table changes - }); - - buttonContainer.appendChild(prevPage); - buttonContainer.appendChild(label); - buttonContainer.appendChild(nextPage); - el.appendChild(container); - el.appendChild(buttonContainer); - } - export default { render }; - """ + @property + def _esm(self): + """Load JavaScript code from external file.""" + return resources.read_text(bigframes.display, "table_widget.js") page = traitlets.Int(0).tag(sync=True) page_size = traitlets.Int(25).tag(sync=True) diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js new file mode 100644 index 0000000000..e8b3d732fd --- /dev/null +++ b/bigframes/display/table_widget.js @@ -0,0 +1,94 @@ +/** +* Copyright 2025 Google LLC +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +const ModelProperty = { + TABLE_HTML: 'table_html', + ROW_COUNT: 'row_count', + PAGE_SIZE: 'page_size', + PAGE: 'page', +}; + +const Event = { + CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`, + CLICK: 'click', +}; + +/** + * Renders a paginated table and its controls into a given element. + * @param {{ + * model: !Backbone.Model, + * el: !HTMLElement + * }} options + */ +function render({model, el}) { + const container = document.createElement('div'); + container.innerHTML = model.get(ModelProperty.TABLE_HTML); + + const buttonContainer = document.createElement('div'); + const prevPage = document.createElement('button'); + const label = document.createElement('span'); + const nextPage = document.createElement('button'); + + prevPage.type = 'button'; + nextPage.type = 'button'; + prevPage.textContent = 'Prev'; + nextPage.textContent = 'Next'; + + /** Updates the button states and page label based on the model. */ + function updateButtonStates() { + const totalPages = Math.ceil( + model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE)); + const currentPage = model.get(ModelProperty.PAGE); + + label.textContent = `Page ${currentPage + 1} of ${totalPages}`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = currentPage >= totalPages - 1; + } + + /** + * Updates the page in the model. + * @param {number} direction -1 for previous, 1 for next. + */ + function handlePageChange(direction) { + const currentPage = model.get(ModelProperty.PAGE); + const newPage = Math.max(0, currentPage + direction); + if (newPage !== currentPage) { + model.set(ModelProperty.PAGE, newPage); + model.save_changes(); + } + } + + prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); + nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); + + model.on(Event.CHANGE_TABLE_HTML, () => { + // Note: Using innerHTML can be a security risk if the content is + // user-generated. Ensure 'table_html' is properly sanitized. + container.innerHTML = model.get(ModelProperty.TABLE_HTML); + updateButtonStates(); + }); + + // Initial setup + updateButtonStates(); + + buttonContainer.appendChild(prevPage); + buttonContainer.appendChild(label); + buttonContainer.appendChild(nextPage); + el.appendChild(container); + el.appendChild(buttonContainer); +} + +export default {render}; diff --git a/owlbot.py b/owlbot.py index fa5491ee20..e70eb14796 100644 --- a/owlbot.py +++ b/owlbot.py @@ -107,6 +107,13 @@ "BigQuery DataFrames provides DataFrame APIs on the BigQuery engine", ) +# Include JavaScript files for anywidget +assert 1 == s.replace( # MANIFEST.in + ["MANIFEST.in"], + "recursive-include bigframes *.json *.proto py.typed", + "recursive-include bigframes *.json *.proto *.js py.typed", +) + # Don't omit `*/core/*.py` when counting test coverages assert 1 == s.replace( # .coveragerc [".coveragerc"], From e939d030616786444fd2f6503d7efe313f13e87a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 25 Jun 2025 00:27:45 +0000 Subject: [PATCH 06/23] fix owlbot.py --- owlbot.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/owlbot.py b/owlbot.py index e70eb14796..c8ed6866ae 100644 --- a/owlbot.py +++ b/owlbot.py @@ -100,6 +100,15 @@ "recursive-include third_party/bigframes_vendored *\nrecursive-include bigframes", ) + +# Include JavaScript files for display widgets +assert 1 == s.replace( # MANIFEST.in + ["MANIFEST.in"], + re.escape("recursive-include bigframes *.json *.proto py.typed"), + "recursive-include bigframes *.json *.proto *.js py.typed", +) + + # Fixup the documentation. assert 1 == s.replace( # docs/conf.py ["docs/conf.py"], @@ -107,12 +116,6 @@ "BigQuery DataFrames provides DataFrame APIs on the BigQuery engine", ) -# Include JavaScript files for anywidget -assert 1 == s.replace( # MANIFEST.in - ["MANIFEST.in"], - "recursive-include bigframes *.json *.proto py.typed", - "recursive-include bigframes *.json *.proto *.js py.typed", -) # Don't omit `*/core/*.py` when counting test coverages assert 1 == s.replace( # .coveragerc From 834cf33bf1cd52725361f48883856ddd047c50cb Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 25 Jun 2025 00:30:19 +0000 Subject: [PATCH 07/23] remove extra line --- owlbot.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/owlbot.py b/owlbot.py index c8ed6866ae..5dc57a35b8 100644 --- a/owlbot.py +++ b/owlbot.py @@ -100,7 +100,6 @@ "recursive-include third_party/bigframes_vendored *\nrecursive-include bigframes", ) - # Include JavaScript files for display widgets assert 1 == s.replace( # MANIFEST.in ["MANIFEST.in"], @@ -108,7 +107,6 @@ "recursive-include bigframes *.json *.proto *.js py.typed", ) - # Fixup the documentation. assert 1 == s.replace( # docs/conf.py ["docs/conf.py"], @@ -116,7 +114,6 @@ "BigQuery DataFrames provides DataFrame APIs on the BigQuery engine", ) - # Don't omit `*/core/*.py` when counting test coverages assert 1 == s.replace( # .coveragerc [".coveragerc"], From 8bd103a2ac8eaf32664b46f31815b3f586f23d7c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 25 Jun 2025 21:58:44 +0000 Subject: [PATCH 08/23] modify testcase --- .pre-commit-config.yaml | 5 + bigframes/display/anywidget.py | 13 +- bigframes/display/table_widget.js | 137 ++++++------- tests/system/small/test_anywidget.py | 282 +++++++++++++++++++-------- 4 files changed, 289 insertions(+), 148 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 93cc5e4210..323ef8f07a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,3 +42,8 @@ repos: additional_dependencies: [types-requests, types-tabulate, types-PyYAML, pandas-stubs<=2.2.3.241126] exclude: "^third_party" args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"] +- repo: https://github.com/biomejs/pre-commit + rev: v2.0.2 + hooks: + - id: biome-check + files: '\.js$' diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index e6d0ef9603..592b3923ee 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -13,6 +13,8 @@ # limitations under the License. from importlib import resources +import functools +import math from typing import Iterator import uuid @@ -28,7 +30,7 @@ class TableWidget(anywidget.AnyWidget): An interactive, paginated table widget for BigFrames DataFrames. """ - @property + @functools.cached_property def _esm(self): """Load JavaScript code from external file.""" return resources.read_text(bigframes.display, "table_widget.js") @@ -65,6 +67,15 @@ def __init__(self, dataframe): # get the initial page self._set_table_html() + @traitlets.validate("page") + def _validate_page(self, proposal): + """Validate and clamp page number to valid range.""" + value = proposal["value"] + if self.row_count == 0 or self.page_size == 0: + return 0 + max_page = max(0, math.ceil(self.row_count / self.page_size) - 1) + return max(0, min(value, max_page)) + def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index e8b3d732fd..71484af4d5 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -1,29 +1,29 @@ /** -* Copyright 2025 Google LLC -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ const ModelProperty = { - TABLE_HTML: 'table_html', - ROW_COUNT: 'row_count', - PAGE_SIZE: 'page_size', - PAGE: 'page', + TABLE_HTML: "table_html", + ROW_COUNT: "row_count", + PAGE_SIZE: "page_size", + PAGE: "page", }; const Event = { - CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`, - CLICK: 'click', + CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`, + CLICK: "click", }; /** @@ -33,62 +33,63 @@ const Event = { * el: !HTMLElement * }} options */ -function render({model, el}) { - const container = document.createElement('div'); - container.innerHTML = model.get(ModelProperty.TABLE_HTML); +function render({ model, el }) { + const container = document.createElement("div"); + container.innerHTML = model.get(ModelProperty.TABLE_HTML); - const buttonContainer = document.createElement('div'); - const prevPage = document.createElement('button'); - const label = document.createElement('span'); - const nextPage = document.createElement('button'); + const buttonContainer = document.createElement("div"); + const prevPage = document.createElement("button"); + const label = document.createElement("span"); + const nextPage = document.createElement("button"); - prevPage.type = 'button'; - nextPage.type = 'button'; - prevPage.textContent = 'Prev'; - nextPage.textContent = 'Next'; + prevPage.type = "button"; + nextPage.type = "button"; + prevPage.textContent = "Prev"; + nextPage.textContent = "Next"; - /** Updates the button states and page label based on the model. */ - function updateButtonStates() { - const totalPages = Math.ceil( - model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE)); - const currentPage = model.get(ModelProperty.PAGE); + /** Updates the button states and page label based on the model. */ + function updateButtonStates() { + const totalPages = Math.ceil( + model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE), + ); + const currentPage = model.get(ModelProperty.PAGE); - label.textContent = `Page ${currentPage + 1} of ${totalPages}`; - prevPage.disabled = currentPage === 0; - nextPage.disabled = currentPage >= totalPages - 1; - } + label.textContent = `Page ${currentPage + 1} of ${totalPages}`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = currentPage >= totalPages - 1; + } - /** - * Updates the page in the model. - * @param {number} direction -1 for previous, 1 for next. - */ - function handlePageChange(direction) { - const currentPage = model.get(ModelProperty.PAGE); - const newPage = Math.max(0, currentPage + direction); - if (newPage !== currentPage) { - model.set(ModelProperty.PAGE, newPage); - model.save_changes(); - } - } + /** + * Updates the page in the model. + * @param {number} direction -1 for previous, 1 for next. + */ + function handlePageChange(direction) { + const currentPage = model.get(ModelProperty.PAGE); + const newPage = Math.max(0, currentPage + direction); + if (newPage !== currentPage) { + model.set(ModelProperty.PAGE, newPage); + model.save_changes(); + } + } - prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); - nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); + prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); + nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); - model.on(Event.CHANGE_TABLE_HTML, () => { - // Note: Using innerHTML can be a security risk if the content is - // user-generated. Ensure 'table_html' is properly sanitized. - container.innerHTML = model.get(ModelProperty.TABLE_HTML); - updateButtonStates(); - }); + model.on(Event.CHANGE_TABLE_HTML, () => { + // Note: Using innerHTML can be a security risk if the content is + // user-generated. Ensure 'table_html' is properly sanitized. + container.innerHTML = model.get(ModelProperty.TABLE_HTML); + updateButtonStates(); + }); - // Initial setup - updateButtonStates(); + // Initial setup + updateButtonStates(); - buttonContainer.appendChild(prevPage); - buttonContainer.appendChild(label); - buttonContainer.appendChild(nextPage); - el.appendChild(container); - el.appendChild(buttonContainer); + buttonContainer.appendChild(prevPage); + buttonContainer.appendChild(label); + buttonContainer.appendChild(nextPage); + el.appendChild(container); + el.appendChild(buttonContainer); } -export default {render}; +export default { render }; diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 1f56cf43d9..f7dbbac9f2 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -12,111 +12,235 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math - +import pandas as pd import pytest import bigframes as bf +pytest.importorskip("anywidget") + + +@pytest.fixture(scope="module") +def paginated_pandas_df() -> pd.DataFrame: + """Create a test DataFrame with exactly 3 pages of manually defined data.""" + test_data = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + "page_indicator": [ + # Page 1 (rows 1-5) + "page_1_row_1", + "page_1_row_2", + "page_1_row_3", + "page_1_row_4", + "page_1_row_5", + # Page 2 (rows 6-10) + "page_2_row_1", + "page_2_row_2", + "page_2_row_3", + "page_2_row_4", + "page_2_row_5", + # Page 3 (rows 11-15) + "page_3_row_1", + "page_3_row_2", + "page_3_row_3", + "page_3_row_4", + "page_3_row_5", + ], + "value": [ + "data_001", + "data_002", + "data_003", + "data_004", + "data_005", + "data_006", + "data_007", + "data_008", + "data_009", + "data_010", + "data_011", + "data_012", + "data_013", + "data_014", + "data_015", + ], + } + ) + return test_data + + +@pytest.fixture(scope="module") +def paginated_bf_df( + session: bf.Session, paginated_pandas_df: pd.DataFrame +) -> bf.dataframe.DataFrame: + return session.read_pandas(paginated_pandas_df) + + +@pytest.fixture(scope="module") +def table_widget(paginated_bf_df: bf.dataframe.DataFrame): + """ + Helper fixture to create a TableWidget instance with a fixed page size. + This reduces duplication across tests that use the same widget configuration. + """ + from bigframes.display import TableWidget + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): + widget = TableWidget(paginated_bf_df) + return widget -def test_repr_anywidget_initial_state( - penguins_df_default_index: bf.dataframe.DataFrame, + +def _assert_html_matches_pandas_slice( + table_html: str, + expected_pd_slice: pd.DataFrame, + full_pd_df: pd.DataFrame, +): + """ + Assertion helper to verify that the rendered HTML contains exactly the + rows from the expected pandas DataFrame slice and no others. This is + inspired by the pattern of comparing BigFrames output to pandas output. + """ + # Check that the unique indicator from each expected row is present. + for _, row in expected_pd_slice.iterrows(): + assert ( + row["page_indicator"] in table_html + ), f"Expected row '{row['page_indicator']}' to be in the table HTML." + + # Create a DataFrame of all rows that should NOT be present. + unexpected_pd_df = full_pd_df.drop(expected_pd_slice.index) + + # Check that no unique indicators from unexpected rows are present. + for _, row in unexpected_pd_df.iterrows(): + assert ( + row["page_indicator"] not in table_html + ), f"Expected row '{row['page_indicator']}' NOT to be in the table HTML." + + +def test_repr_anywidget_initialization_set_correct_defaults( + paginated_bf_df: bf.dataframe.DataFrame, + paginated_pandas_df: pd.DataFrame, ): - pytest.importorskip("anywidget") + """ + A TableWidget should initialize with correct default values. + """ with bf.option_context("display.repr_mode", "anywidget"): from bigframes.display import TableWidget - widget = TableWidget(penguins_df_default_index) - assert widget.page == 0 - assert widget.page_size == bf.options.display.max_rows - assert widget.row_count > 0 + widget = TableWidget(paginated_bf_df) + assert widget.page == 0, "Initial page should be 0." + assert ( + widget.page_size == bf.options.display.max_rows + ), "Page size should default to max_rows option." + assert widget.row_count == len( + paginated_pandas_df + ), "Row count should match the source DataFrame." -def test_repr_anywidget_pagination_navigation( - penguins_df_default_index: bf.dataframe.DataFrame, -): - """Test basic prev/next navigation functionality.""" - pytest.importorskip("anywidget") - with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display.anywidget import TableWidget - widget = TableWidget(penguins_df_default_index) +def test_repr_anywidget_display_first_page_on_load(table_widget, paginated_pandas_df): + """ + Given a widget, when it is first loaded, then it should display + the first page of data. + """ + expected_slice = paginated_pandas_df.iloc[0:5] - # Test initial state - assert widget.page == 0 + html = table_widget.table_html - # Simulate next page click - widget.page = 1 - assert widget.page == 1 + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) - # Simulate prev page click - widget.page = 0 - assert widget.page == 0 +def test_repr_anywidget_navigate_to_second_page(table_widget, paginated_pandas_df): + """ + Given a widget, when the page is set to 1, then it should display + the second page of data. + """ + expected_slice = paginated_pandas_df.iloc[5:10] -def test_repr_anywidget_pagination_edge_cases( - penguins_df_default_index: bf.dataframe.DataFrame, -): - """Test pagination at boundaries.""" - pytest.importorskip("anywidget") - with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display.anywidget import TableWidget + table_widget.page = 1 + html = table_widget.table_html - widget = TableWidget(penguins_df_default_index) + assert table_widget.page == 1 + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) - # Test going below page 0 - widget.page = -1 - # Should stay at 0 (handled by frontend) - # Test going beyond last page - total_pages = math.ceil(widget.row_count / widget.page_size) - widget.page = total_pages + 1 - # Should be clamped to last valid page +def test_repr_anywidget_navigate_to_last_page(table_widget, paginated_pandas_df): + """ + Given a widget, when the page is set to the last page (2), + then it should display the final page of data. + """ + expected_slice = paginated_pandas_df.iloc[10:15] + table_widget.page = 2 + html = table_widget.table_html -def test_repr_anywidget_pagination_different_page_sizes( - penguins_df_default_index: bf.dataframe.DataFrame, -): - """Test pagination with different page sizes.""" - pytest.importorskip("anywidget") + assert table_widget.page == 2 + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) - # Test with smaller page size - with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): - from bigframes.display.anywidget import TableWidget - widget = TableWidget(penguins_df_default_index) +def test_repr_anywidget_page_clamp_to_zero_for_negative_input( + table_widget, paginated_pandas_df +): + """ + Given a widget, when a negative page number is set, + then the page number should be clamped to 0 and display the first page. + """ + expected_slice = paginated_pandas_df.iloc[0:5] - assert widget.page_size == 5 - total_pages = math.ceil(widget.row_count / 5) - assert total_pages > 1 # Should have multiple pages + table_widget.page = -1 + html = table_widget.table_html - # Navigate through several pages - for page in range(min(3, total_pages)): - widget.page = page - assert widget.page == page + assert table_widget.page == 0, "Page should be clamped to 0." + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_pagination_buttons_functionality( - penguins_df_default_index: bf.dataframe.DataFrame, +def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( + table_widget, paginated_pandas_df ): - """Test complete pagination button functionality.""" - pytest.importorskip("anywidget") - with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 10): - from bigframes.display.anywidget import TableWidget - - widget = TableWidget(penguins_df_default_index) - - # Test initial state - assert widget.page == 0 - assert widget.page_size == 10 - assert widget.row_count > 0 - - # Calculate expected pages - total_pages = math.ceil(widget.row_count / widget.page_size) - - # Test navigation through all pages - for page_num in range(min(total_pages, 5)): # Test first 5 pages - widget.page = page_num - assert widget.page == page_num - # Verify table_html is updated - assert len(widget.table_html) > 0 + """ + Given a widget, when a page number greater than the max is set, + then the page number should be clamped to the last valid page. + """ + expected_slice = paginated_pandas_df.iloc[10:15] + + table_widget.page = 100 + html = table_widget.table_html + + assert table_widget.page == 2, "Page should be clamped to the last valid page." + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) + + +@pytest.mark.parametrize( + "page, start_row, end_row", + [ + (0, 0, 3), + (1, 3, 6), + (2, 6, 9), + (3, 9, 12), + (4, 12, 15), + ], + ids=[ + "Page 0 (Rows 1-3)", + "Page 1 (Rows 4-6)", + "Page 2 (Rows 7-9)", + "Page 3 (Rows 10-12)", + "Page 4 (Rows 13-15)", + ], +) +def test_repr_anywidget_paginate_correctly_with_custom_page_size( + paginated_bf_df, paginated_pandas_df, page, start_row, end_row +): + """ + A widget should paginate correctly with a custom page size of 3. + This uses pytest parameterization, a strong pattern from the examples. + """ + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 3): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + assert widget.page_size == 3 + + expected_slice = paginated_pandas_df.iloc[start_row:end_row] + + widget.page = page + html = widget.table_html + + assert widget.page == page + _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) From 6f5433a3e3cb08e5a6e4cfd3439a5b4356c4c2bc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 25 Jun 2025 23:28:16 +0000 Subject: [PATCH 09/23] add cleanup session --- tests/system/small/test_anywidget.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index f7dbbac9f2..c13aaeffee 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -88,6 +88,18 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame): return widget +@pytest.fixture(scope="module", autouse=True) +def cleanup_session(session): + """Ensure session cleanup happens after all tests in this module.""" + yield + # Force cleanup of all temporary resources if session is still active + try: + session.close() + except Exception: + # Session may already be closed by the global fixture + pass + + def _assert_html_matches_pandas_slice( table_html: str, expected_pd_slice: pd.DataFrame, From dbfa8f1338f359da516e1cada26886afe4fd58cd Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 25 Jun 2025 23:55:38 +0000 Subject: [PATCH 10/23] use traceback --- tests/system/small/test_anywidget.py | 39 +++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index c13aaeffee..e6e42e19ed 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import traceback + import pandas as pd import pytest @@ -20,6 +22,31 @@ pytest.importorskip("anywidget") +@pytest.fixture(scope="module", autouse=True) +def cleanup_session(session): + """Ensure comprehensive cleanup happens after all tests in this module.""" + yield + try: + # Force cleanup of anonymous dataset and all temporary tables + if hasattr(session, "_anon_dataset_manager") and session._anon_dataset_manager: + session._anon_dataset_manager.close() + + # Also call the main session cleanup + session.close() + except Exception as e: + traceback.print_exception(type(e), e, None) + # Try the BigFrames cleanup function as fallback + try: + import bigframes.pandas as bpd + + bpd.clean_up_by_session_id( + session.session_id, location=session._location, project=session._project + ) + except Exception as cleanup_error: + print(f"Warning: Fallback cleanup also failed: {cleanup_error}") + traceback.print_exception(type(cleanup_error), cleanup_error, None) + + @pytest.fixture(scope="module") def paginated_pandas_df() -> pd.DataFrame: """Create a test DataFrame with exactly 3 pages of manually defined data.""" @@ -88,18 +115,6 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame): return widget -@pytest.fixture(scope="module", autouse=True) -def cleanup_session(session): - """Ensure session cleanup happens after all tests in this module.""" - yield - # Force cleanup of all temporary resources if session is still active - try: - session.close() - except Exception: - # Session may already be closed by the global fixture - pass - - def _assert_html_matches_pandas_slice( table_html: str, expected_pd_slice: pd.DataFrame, From fc247acc607462650378de474319d04f790e767b Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 26 Jun 2025 00:03:59 +0000 Subject: [PATCH 11/23] no need of fall back method --- tests/system/small/test_anywidget.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index e6e42e19ed..df8ccec050 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -35,16 +35,6 @@ def cleanup_session(session): session.close() except Exception as e: traceback.print_exception(type(e), e, None) - # Try the BigFrames cleanup function as fallback - try: - import bigframes.pandas as bpd - - bpd.clean_up_by_session_id( - session.session_id, location=session._location, project=session._project - ) - except Exception as cleanup_error: - print(f"Warning: Fallback cleanup also failed: {cleanup_error}") - traceback.print_exception(type(cleanup_error), cleanup_error, None) @pytest.fixture(scope="module") From 77530435a43e3a174ba359fb5c2e3a8eb11e9f4e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 26 Jun 2025 03:01:58 +0000 Subject: [PATCH 12/23] use test-specific small data instead --- tests/system/small/test_anywidget.py | 102 ++++++--------------------- 1 file changed, 23 insertions(+), 79 deletions(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index df8ccec050..231e69bb26 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import traceback - import pandas as pd import pytest @@ -22,64 +20,25 @@ pytest.importorskip("anywidget") -@pytest.fixture(scope="module", autouse=True) -def cleanup_session(session): - """Ensure comprehensive cleanup happens after all tests in this module.""" - yield - try: - # Force cleanup of anonymous dataset and all temporary tables - if hasattr(session, "_anon_dataset_manager") and session._anon_dataset_manager: - session._anon_dataset_manager.close() - - # Also call the main session cleanup - session.close() - except Exception as e: - traceback.print_exception(type(e), e, None) - - @pytest.fixture(scope="module") def paginated_pandas_df() -> pd.DataFrame: """Create a test DataFrame with exactly 3 pages of manually defined data.""" + """Create a minimal test DataFrame with exactly 3 pages of 2 rows each.""" test_data = pd.DataFrame( { - "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + "id": [0, 1, 2, 3, 4, 5], "page_indicator": [ - # Page 1 (rows 1-5) + # Page 1 (rows 1-2) "page_1_row_1", "page_1_row_2", - "page_1_row_3", - "page_1_row_4", - "page_1_row_5", - # Page 2 (rows 6-10) + # Page 2 (rows 3-4) "page_2_row_1", "page_2_row_2", - "page_2_row_3", - "page_2_row_4", - "page_2_row_5", - # Page 3 (rows 11-15) + # Page 3 (rows 5-6) "page_3_row_1", "page_3_row_2", - "page_3_row_3", - "page_3_row_4", - "page_3_row_5", - ], - "value": [ - "data_001", - "data_002", - "data_003", - "data_004", - "data_005", - "data_006", - "data_007", - "data_008", - "data_009", - "data_010", - "data_011", - "data_012", - "data_013", - "data_014", - "data_015", ], + "value": [0, 1, 2, 3, 4, 5], } ) return test_data @@ -100,7 +59,7 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame): """ from bigframes.display import TableWidget - with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): widget = TableWidget(paginated_bf_df) return widget @@ -117,18 +76,14 @@ def _assert_html_matches_pandas_slice( """ # Check that the unique indicator from each expected row is present. for _, row in expected_pd_slice.iterrows(): - assert ( - row["page_indicator"] in table_html - ), f"Expected row '{row['page_indicator']}' to be in the table HTML." + assert row["page_indicator"] in table_html # Create a DataFrame of all rows that should NOT be present. unexpected_pd_df = full_pd_df.drop(expected_pd_slice.index) # Check that no unique indicators from unexpected rows are present. for _, row in unexpected_pd_df.iterrows(): - assert ( - row["page_indicator"] not in table_html - ), f"Expected row '{row['page_indicator']}' NOT to be in the table HTML." + assert row["page_indicator"] not in table_html def test_repr_anywidget_initialization_set_correct_defaults( @@ -143,13 +98,9 @@ def test_repr_anywidget_initialization_set_correct_defaults( widget = TableWidget(paginated_bf_df) - assert widget.page == 0, "Initial page should be 0." - assert ( - widget.page_size == bf.options.display.max_rows - ), "Page size should default to max_rows option." - assert widget.row_count == len( - paginated_pandas_df - ), "Row count should match the source DataFrame." + assert widget.page == 0 + assert widget.page_size == bf.options.display.max_rows + assert widget.row_count == len(paginated_pandas_df) def test_repr_anywidget_display_first_page_on_load(table_widget, paginated_pandas_df): @@ -157,7 +108,7 @@ def test_repr_anywidget_display_first_page_on_load(table_widget, paginated_panda Given a widget, when it is first loaded, then it should display the first page of data. """ - expected_slice = paginated_pandas_df.iloc[0:5] + expected_slice = paginated_pandas_df.iloc[0:2] html = table_widget.table_html @@ -169,7 +120,7 @@ def test_repr_anywidget_navigate_to_second_page(table_widget, paginated_pandas_d Given a widget, when the page is set to 1, then it should display the second page of data. """ - expected_slice = paginated_pandas_df.iloc[5:10] + expected_slice = paginated_pandas_df.iloc[2:4] table_widget.page = 1 html = table_widget.table_html @@ -183,7 +134,7 @@ def test_repr_anywidget_navigate_to_last_page(table_widget, paginated_pandas_df) Given a widget, when the page is set to the last page (2), then it should display the final page of data. """ - expected_slice = paginated_pandas_df.iloc[10:15] + expected_slice = paginated_pandas_df.iloc[4:6] table_widget.page = 2 html = table_widget.table_html @@ -199,12 +150,12 @@ def test_repr_anywidget_page_clamp_to_zero_for_negative_input( Given a widget, when a negative page number is set, then the page number should be clamped to 0 and display the first page. """ - expected_slice = paginated_pandas_df.iloc[0:5] + expected_slice = paginated_pandas_df.iloc[0:2] table_widget.page = -1 html = table_widget.table_html - assert table_widget.page == 0, "Page should be clamped to 0." + assert table_widget.page == 0 _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) @@ -215,30 +166,24 @@ def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( Given a widget, when a page number greater than the max is set, then the page number should be clamped to the last valid page. """ - expected_slice = paginated_pandas_df.iloc[10:15] + expected_slice = paginated_pandas_df.iloc[4:6] table_widget.page = 100 html = table_widget.table_html - assert table_widget.page == 2, "Page should be clamped to the last valid page." + assert table_widget.page == 2 _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) @pytest.mark.parametrize( "page, start_row, end_row", [ - (0, 0, 3), - (1, 3, 6), - (2, 6, 9), - (3, 9, 12), - (4, 12, 15), + (0, 0, 3), # Page 0: rows 0-2 + (1, 3, 6), # Page 1: rows 3-5 ], ids=[ - "Page 0 (Rows 1-3)", - "Page 1 (Rows 4-6)", - "Page 2 (Rows 7-9)", - "Page 3 (Rows 10-12)", - "Page 4 (Rows 13-15)", + "Page 0 (Rows 0-2)", + "Page 1 (Rows 3-5)", ], ) def test_repr_anywidget_paginate_correctly_with_custom_page_size( @@ -246,7 +191,6 @@ def test_repr_anywidget_paginate_correctly_with_custom_page_size( ): """ A widget should paginate correctly with a custom page size of 3. - This uses pytest parameterization, a strong pattern from the examples. """ with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 3): from bigframes.display import TableWidget From 2863fd98447d2aac52d35c5551c6c4eead33f0fd Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 26 Jun 2025 04:51:14 +0000 Subject: [PATCH 13/23] testcase update --- tests/system/small/test_anywidget.py | 58 +++++++++++++++++++++------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 231e69bb26..cc56efc564 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -16,13 +16,13 @@ import pytest import bigframes as bf +from bigframes.display import TableWidget pytest.importorskip("anywidget") @pytest.fixture(scope="module") def paginated_pandas_df() -> pd.DataFrame: - """Create a test DataFrame with exactly 3 pages of manually defined data.""" """Create a minimal test DataFrame with exactly 3 pages of 2 rows each.""" test_data = pd.DataFrame( { @@ -52,13 +52,11 @@ def paginated_bf_df( @pytest.fixture(scope="module") -def table_widget(paginated_bf_df: bf.dataframe.DataFrame): +def table_widget(paginated_bf_df: bf.dataframe.DataFrame) -> TableWidget: """ Helper fixture to create a TableWidget instance with a fixed page size. This reduces duplication across tests that use the same widget configuration. """ - from bigframes.display import TableWidget - with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): widget = TableWidget(paginated_bf_df) return widget @@ -86,24 +84,46 @@ def _assert_html_matches_pandas_slice( assert row["page_indicator"] not in table_html -def test_repr_anywidget_initialization_set_correct_defaults( +def test_repr_anywidget_initialization_sets_page_to_zero( paginated_bf_df: bf.dataframe.DataFrame, - paginated_pandas_df: pd.DataFrame, ): - """ - A TableWidget should initialize with correct default values. - """ + """A TableWidget should initialize with the page number set to 0.""" with bf.option_context("display.repr_mode", "anywidget"): from bigframes.display import TableWidget widget = TableWidget(paginated_bf_df) assert widget.page == 0 + + +def test_repr_anywidget_initialization_sets_page_size_from_options( + paginated_bf_df: bf.dataframe.DataFrame, +): + """A TableWidget should initialize its page size from bf.options.""" + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + assert widget.page_size == bf.options.display.max_rows + + +def test_repr_anywidget_initialization_sets_row_count( + paginated_bf_df: bf.dataframe.DataFrame, + paginated_pandas_df: pd.DataFrame, +): + """A TableWidget should initialize with the correct total row count.""" + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + assert widget.row_count == len(paginated_pandas_df) -def test_repr_anywidget_display_first_page_on_load(table_widget, paginated_pandas_df): +def test_repr_anywidget_display_first_page_on_load( + table_widget: TableWidget, paginated_pandas_df: pd.DataFrame +): """ Given a widget, when it is first loaded, then it should display the first page of data. @@ -115,7 +135,9 @@ def test_repr_anywidget_display_first_page_on_load(table_widget, paginated_panda _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_navigate_to_second_page(table_widget, paginated_pandas_df): +def test_repr_anywidget_navigate_to_second_page( + table_widget: TableWidget, paginated_pandas_df: pd.DataFrame +): """ Given a widget, when the page is set to 1, then it should display the second page of data. @@ -129,7 +151,9 @@ def test_repr_anywidget_navigate_to_second_page(table_widget, paginated_pandas_d _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_navigate_to_last_page(table_widget, paginated_pandas_df): +def test_repr_anywidget_navigate_to_last_page( + table_widget: TableWidget, paginated_pandas_df: pd.DataFrame +): """ Given a widget, when the page is set to the last page (2), then it should display the final page of data. @@ -144,7 +168,7 @@ def test_repr_anywidget_navigate_to_last_page(table_widget, paginated_pandas_df) def test_repr_anywidget_page_clamp_to_zero_for_negative_input( - table_widget, paginated_pandas_df + table_widget: TableWidget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when a negative page number is set, @@ -160,7 +184,7 @@ def test_repr_anywidget_page_clamp_to_zero_for_negative_input( def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( - table_widget, paginated_pandas_df + table_widget: TableWidget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when a page number greater than the max is set, @@ -187,7 +211,11 @@ def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( ], ) def test_repr_anywidget_paginate_correctly_with_custom_page_size( - paginated_bf_df, paginated_pandas_df, page, start_row, end_row + paginated_bf_df: bf.dataframe.DataFrame, + paginated_pandas_df: pd.DataFrame, + page: int, + start_row: int, + end_row: int, ): """ A widget should paginate correctly with a custom page size of 3. From 88be9d02ae2ee296a83e6323b618c992f8ea5ab6 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 26 Jun 2025 23:01:29 +0000 Subject: [PATCH 14/23] handle opitonal import --- bigframes/dataframe.py | 9 ++++-- bigframes/display/__init__.py | 5 +-- bigframes/display/anywidget.py | 48 ++++++++++++++++++---------- setup.py | 1 + tests/system/small/test_anywidget.py | 29 +++++++++-------- 5 files changed, 55 insertions(+), 37 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 79aadfc696..cff151da78 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -783,9 +783,14 @@ def _repr_html_(self) -> str: try: from bigframes import display - return display.TableWidget(self) - except AttributeError: + # Store the widget for _repr_mimebundle_ to use + self._anywidget_instance = display.TableWidget(self) + # Return a fallback HTML string + return "Interactive table widget (anywidget mode)" + except (AttributeError, ValueError): # Fallback if anywidget is not available + import warnings + warnings.warn( "Anywidget mode is not available, falling back to deferred mode." ) diff --git a/bigframes/display/__init__.py b/bigframes/display/__init__.py index 882f1e6db5..48e52bc766 100644 --- a/bigframes/display/__init__.py +++ b/bigframes/display/__init__.py @@ -14,8 +14,6 @@ from __future__ import annotations -import warnings - try: import anywidget # noqa @@ -23,5 +21,4 @@ __all__ = ["TableWidget"] except Exception: - msg = "Anywidget mode not available as anywidget is not installed." - warnings.warn(msg) + pass diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 592b3923ee..2f1ae18c6d 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -12,41 +12,45 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from importlib import resources import functools import math -from typing import Iterator +from typing import Iterator, TYPE_CHECKING import uuid -import anywidget # type: ignore import pandas as pd -import traitlets import bigframes +# Follow the same pattern as Polars +anywidget_installed = True +if TYPE_CHECKING: + import anywidget + import traitlets +else: + try: + import anywidget + import traitlets + except Exception: + anywidget_installed = False + class TableWidget(anywidget.AnyWidget): """ An interactive, paginated table widget for BigFrames DataFrames. """ - @functools.cached_property - def _esm(self): - """Load JavaScript code from external file.""" - return resources.read_text(bigframes.display, "table_widget.js") - - page = traitlets.Int(0).tag(sync=True) - page_size = traitlets.Int(25).tag(sync=True) - row_count = traitlets.Int(0).tag(sync=True) - table_html = traitlets.Unicode().tag(sync=True) - def __init__(self, dataframe): """ Initialize the TableWidget. - Args: dataframe: The Bigframes Dataframe to display. """ + if not anywidget_installed: + raise ValueError("Anywidget is not installed, cannot create TableWidget.") + super().__init__() self._dataframe = dataframe @@ -67,6 +71,17 @@ def __init__(self, dataframe): # get the initial page self._set_table_html() + # Use functools.cached_property instead of @property for _esm + @functools.cached_property + def _esm(self): + """Load JavaScript code from external file.""" + return resources.read_text(bigframes.display, "table_widget.js") + + page = traitlets.Int(0).tag(sync=True) + page_size = traitlets.Int(25).tag(sync=True) + row_count = traitlets.Int(0).tag(sync=True) + table_html = traitlets.Unicode().tag(sync=True) + @traitlets.validate("page") def _validate_page(self, proposal): """Validate and clamp page number to valid range.""" @@ -79,7 +94,6 @@ def _validate_page(self, proposal): def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. - Returns: bool: True if a batch was successfully loaded, False otherwise. """ @@ -115,7 +129,7 @@ def _set_table_html(self): while len(self._cached_data) < end and not self._all_data_loaded: self._get_next_batch() - # Get the data fro the current page + # Get the data for the current page page_data = self._cached_data.iloc[start:end] # Generate HTML table @@ -129,5 +143,5 @@ def _set_table_html(self): @traitlets.observe("page") def _page_changed(self, change): - """Handler for when the page nubmer is changed from the frontend.""" + """Handler for when the page number is changed from the frontend.""" self._set_table_html() diff --git a/setup.py b/setup.py index ce0375527d..b1816a279f 100644 --- a/setup.py +++ b/setup.py @@ -89,6 +89,7 @@ # install anywidget for SQL "anywidget": [ "anywidget>=0.9.18", + "traitlets", ], } extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values())))) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index cc56efc564..4bf23392d3 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -16,7 +16,6 @@ import pytest import bigframes as bf -from bigframes.display import TableWidget pytest.importorskip("anywidget") @@ -52,13 +51,15 @@ def paginated_bf_df( @pytest.fixture(scope="module") -def table_widget(paginated_bf_df: bf.dataframe.DataFrame) -> TableWidget: +def table_widget(paginated_bf_df: bf.dataframe.DataFrame): """ Helper fixture to create a TableWidget instance with a fixed page size. This reduces duplication across tests that use the same widget configuration. """ + from bigframes import display + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): - widget = TableWidget(paginated_bf_df) + widget = display.TableWidget(paginated_bf_df) return widget @@ -89,9 +90,9 @@ def test_repr_anywidget_initialization_sets_page_to_zero( ): """A TableWidget should initialize with the page number set to 0.""" with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display import TableWidget + from bigframes import display - widget = TableWidget(paginated_bf_df) + widget = display.TableWidget(paginated_bf_df) assert widget.page == 0 @@ -101,9 +102,9 @@ def test_repr_anywidget_initialization_sets_page_size_from_options( ): """A TableWidget should initialize its page size from bf.options.""" with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display import TableWidget + from bigframes import display - widget = TableWidget(paginated_bf_df) + widget = display.TableWidget(paginated_bf_df) assert widget.page_size == bf.options.display.max_rows @@ -114,15 +115,15 @@ def test_repr_anywidget_initialization_sets_row_count( ): """A TableWidget should initialize with the correct total row count.""" with bf.option_context("display.repr_mode", "anywidget"): - from bigframes.display import TableWidget + from bigframes import display - widget = TableWidget(paginated_bf_df) + widget = display.TableWidget(paginated_bf_df) assert widget.row_count == len(paginated_pandas_df) def test_repr_anywidget_display_first_page_on_load( - table_widget: TableWidget, paginated_pandas_df: pd.DataFrame + table_widget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when it is first loaded, then it should display @@ -136,7 +137,7 @@ def test_repr_anywidget_display_first_page_on_load( def test_repr_anywidget_navigate_to_second_page( - table_widget: TableWidget, paginated_pandas_df: pd.DataFrame + table_widget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when the page is set to 1, then it should display @@ -152,7 +153,7 @@ def test_repr_anywidget_navigate_to_second_page( def test_repr_anywidget_navigate_to_last_page( - table_widget: TableWidget, paginated_pandas_df: pd.DataFrame + table_widget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when the page is set to the last page (2), @@ -168,7 +169,7 @@ def test_repr_anywidget_navigate_to_last_page( def test_repr_anywidget_page_clamp_to_zero_for_negative_input( - table_widget: TableWidget, paginated_pandas_df: pd.DataFrame + table_widget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when a negative page number is set, @@ -184,7 +185,7 @@ def test_repr_anywidget_page_clamp_to_zero_for_negative_input( def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( - table_widget: TableWidget, paginated_pandas_df: pd.DataFrame + table_widget, paginated_pandas_df: pd.DataFrame ): """ Given a widget, when a page number greater than the max is set, From 5c475b50425dba20827736da9c93459d772c3c26 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 27 Jun 2025 02:36:39 +0000 Subject: [PATCH 15/23] polish python function --- bigframes/display/anywidget.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 2f1ae18c6d..65fd188d78 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -24,8 +24,7 @@ import bigframes -# Follow the same pattern as Polars -anywidget_installed = True +ANYWIDGET_INSTALLED = True if TYPE_CHECKING: import anywidget import traitlets @@ -34,7 +33,7 @@ import anywidget import traitlets except Exception: - anywidget_installed = False + ANYWIDGET_INSTALLED = False class TableWidget(anywidget.AnyWidget): @@ -42,13 +41,14 @@ class TableWidget(anywidget.AnyWidget): An interactive, paginated table widget for BigFrames DataFrames. """ - def __init__(self, dataframe): + def __init__(self, dataframe: bigframes.dataframe.DataFrame): """ Initialize the TableWidget. + Args: - dataframe: The Bigframes Dataframe to display. + dataframe: The Bigframes Dataframe to display in the widget. """ - if not anywidget_installed: + if not ANYWIDGET_INSTALLED: raise ValueError("Anywidget is not installed, cannot create TableWidget.") super().__init__() @@ -62,7 +62,7 @@ def __init__(self, dataframe): self._cached_data = pd.DataFrame(columns=self._dataframe.columns) self._table_id = str(uuid.uuid4()) self._all_data_loaded = False - self._batch_iterator = None + self._batch_iterator: Iterator[pd.DataFrame] | None = None # len(dataframe) is expensive, since it will trigger a # SELECT COUNT(*) query. It is a must have however. @@ -94,7 +94,8 @@ def _validate_page(self, proposal): def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. - Returns: + + Return: bool: True if a batch was successfully loaded, False otherwise. """ if self._all_data_loaded: From 5333686b0185cd809c50ada5291f46555a4e8b74 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 27 Jun 2025 02:40:06 +0000 Subject: [PATCH 16/23] remove duplicate warning --- bigframes/dataframe.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index cff151da78..3bc5d49175 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -789,8 +789,6 @@ def _repr_html_(self) -> str: return "Interactive table widget (anywidget mode)" except (AttributeError, ValueError): # Fallback if anywidget is not available - import warnings - warnings.warn( "Anywidget mode is not available, falling back to deferred mode." ) From 701c05e065aee86249a3a850fae4ad094c7501bf Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 27 Jun 2025 02:53:42 +0000 Subject: [PATCH 17/23] finish touch up --- bigframes/display/anywidget.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 65fd188d78..c56f3f8737 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -17,7 +17,7 @@ from importlib import resources import functools import math -from typing import Iterator, TYPE_CHECKING +from typing import Any, Dict, Iterator, TYPE_CHECKING import uuid import pandas as pd @@ -49,7 +49,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): dataframe: The Bigframes Dataframe to display in the widget. """ if not ANYWIDGET_INSTALLED: - raise ValueError("Anywidget is not installed, cannot create TableWidget.") + raise ImportError("Anywidget is not installed, cannot create TableWidget.") super().__init__() self._dataframe = dataframe @@ -71,7 +71,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): # get the initial page self._set_table_html() - # Use functools.cached_property instead of @property for _esm @functools.cached_property def _esm(self): """Load JavaScript code from external file.""" @@ -83,12 +82,23 @@ def _esm(self): table_html = traitlets.Unicode().tag(sync=True) @traitlets.validate("page") - def _validate_page(self, proposal): - """Validate and clamp page number to valid range.""" + def _validate_page(self, proposal: Dict[str, Any]): + """ + Validate and clamp the page number to a valid range. + + Args: + proposal: + A dictionary from the traitlets library containing the proposed + change. The new value is in proposal["value"]. + """ value = proposal["value"] if self.row_count == 0 or self.page_size == 0: return 0 + + # Calculate the zero-indexed maximum page number. max_page = max(0, math.ceil(self.row_count / self.page_size) - 1) + + # Clamp the proposed value to the valid range [0, max_page]. return max(0, min(value, max_page)) def _get_next_batch(self) -> bool: @@ -96,7 +106,7 @@ def _get_next_batch(self) -> bool: Gets the next batch of data from the generator and appends to cache. Return: - bool: True if a batch was successfully loaded, False otherwise. + True if a batch was successfully loaded, False otherwise. """ if self._all_data_loaded: return False From 23877c601c540a8391765bdb870459074d029bb9 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 27 Jun 2025 22:08:22 +0000 Subject: [PATCH 18/23] use WIDGET_BASE --- bigframes/display/anywidget.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index c56f3f8737..01236ab322 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -17,7 +17,7 @@ from importlib import resources import functools import math -from typing import Any, Dict, Iterator, TYPE_CHECKING +from typing import Any, Dict, Iterator, Type, TYPE_CHECKING import uuid import pandas as pd @@ -35,8 +35,14 @@ except Exception: ANYWIDGET_INSTALLED = False +WIDGET_BASE: Type[Any] +if ANYWIDGET_INSTALLED: + WIDGET_BASE = anywidget.AnyWidget +else: + WIDGET_BASE = object + -class TableWidget(anywidget.AnyWidget): +class TableWidget(WIDGET_BASE): """ An interactive, paginated table widget for BigFrames DataFrames. """ From 799daf6c25d4ef1c9375486b01cb6c69994c27e0 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 3 Jul 2025 08:51:22 +0000 Subject: [PATCH 19/23] code change, add more tests --- bigframes/dataframe.py | 55 +++++--- bigframes/display/anywidget.py | 72 +++++----- notebooks/dataframes/anywidget_mode.ipynb | 129 ++++++++++++++---- setup.py | 2 +- testing/constraints-3.9.txt | 3 + tests/system/small/test_anywidget.py | 157 +++++++++++++++++----- 6 files changed, 307 insertions(+), 111 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 3bc5d49175..bf53c3d056 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -39,6 +39,7 @@ Union, ) import warnings +import weakref import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.frame as vendored_pandas_frame @@ -87,6 +88,7 @@ if typing.TYPE_CHECKING: from _typeshed import SupportsRichComparison + from bigframes.display.anywidget import TableWidget import bigframes.session SingleItemValue = Union[bigframes.series.Series, int, float, str, Callable] @@ -111,6 +113,9 @@ class DataFrame(vendored_pandas_frame.DataFrame): # Must be above 5000 for pandas to delegate to bigframes for binops __pandas_priority__ = 15000 + # Type annotation for anywidget instance + _anywidget_instance: Optional[weakref.ReferenceType["TableWidget"]] = None + def __init__( self, data=None, @@ -779,21 +784,7 @@ def _repr_html_(self) -> str: if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) - if opts.repr_mode == "anywidget": - try: - from bigframes import display - - # Store the widget for _repr_mimebundle_ to use - self._anywidget_instance = display.TableWidget(self) - # Return a fallback HTML string - return "Interactive table widget (anywidget mode)" - except (AttributeError, ValueError): - # Fallback if anywidget is not available - warnings.warn( - "Anywidget mode is not available, falling back to deferred mode." - ) - return formatter.repr_query_job(self._compute_dry_run()) - + # Process blob columns first, regardless of display mode self._cached() df = self.copy() if bigframes.options.display.blob_display: @@ -805,7 +796,40 @@ def _repr_html_(self) -> str: for col in blob_cols: # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + else: + blob_cols = [] + if opts.repr_mode == "anywidget": + try: + from IPython.display import display as ipython_display + + from bigframes import display + + # Check if widget instance already exists and reuse it + widget = None + if ( + hasattr(self, "_anywidget_instance") + and self._anywidget_instance is not None + ): + widget = self._anywidget_instance() + + # If widget doesn't exist or was garbage collected, create a new one + if widget is None: + # Pass the processed dataframe (with blob URLs) to the widget + widget = display.TableWidget(df) + self._anywidget_instance = weakref.ref(widget) + + ipython_display(widget) + return "" # Return empty string since we used display() + + except (AttributeError, ValueError, ImportError): + # Fallback if anywidget is not available + warnings.warn( + "Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode." + ) + return formatter.repr_query_job(self._compute_dry_run()) + + # Continue with regular HTML rendering for non-anywidget modes # TODO(swast): pass max_columns and get the true column count back. Maybe # get 1 more column than we have requested so that pandas can add the # ... for us? @@ -814,7 +838,6 @@ def _repr_html_(self) -> str: ) self._set_internal_query_job(query_job) - column_count = len(pandas_df.columns) with display_options.pandas_repr(opts): diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 01236ab322..fd363cd4d6 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -17,23 +17,21 @@ from importlib import resources import functools import math -from typing import Any, Dict, Iterator, Type, TYPE_CHECKING +from typing import Any, Dict, Iterator, List, Optional, Type import uuid import pandas as pd import bigframes -ANYWIDGET_INSTALLED = True -if TYPE_CHECKING: +# Simplified import structure as suggested in review +try: import anywidget import traitlets -else: - try: - import anywidget - import traitlets - except Exception: - ANYWIDGET_INSTALLED = False + + ANYWIDGET_INSTALLED = True +except Exception: + ANYWIDGET_INSTALLED = False WIDGET_BASE: Type[Any] if ANYWIDGET_INSTALLED: @@ -48,14 +46,15 @@ class TableWidget(WIDGET_BASE): """ def __init__(self, dataframe: bigframes.dataframe.DataFrame): - """ - Initialize the TableWidget. + """Initialize the TableWidget. Args: dataframe: The Bigframes Dataframe to display in the widget. """ if not ANYWIDGET_INSTALLED: - raise ImportError("Anywidget is not installed, cannot create TableWidget.") + raise ImportError( + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget." + ) super().__init__() self._dataframe = dataframe @@ -65,13 +64,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): # Initialize data fetching attributes. self._batches = dataframe.to_pandas_batches(page_size=self.page_size) - self._cached_data = pd.DataFrame(columns=self._dataframe.columns) + + # Use list of DataFrames to avoid memory copies from concatenation + self._cached_batches: List[pd.DataFrame] = [] + + # Unique identifier for HTML table element self._table_id = str(uuid.uuid4()) self._all_data_loaded = False - self._batch_iterator: Iterator[pd.DataFrame] | None = None + # Renamed from _batch_iterator to _batch_iter to avoid naming conflict + self._batch_iter: Optional[Iterator[pd.DataFrame]] = None # len(dataframe) is expensive, since it will trigger a # SELECT COUNT(*) query. It is a must have however. + # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()` + # before we get here so that the count might already be cached. self.row_count = len(dataframe) # get the initial page @@ -89,14 +95,13 @@ def _esm(self): @traitlets.validate("page") def _validate_page(self, proposal: Dict[str, Any]): - """ - Validate and clamp the page number to a valid range. + """Validate and clamp the page number to a valid range. Args: - proposal: - A dictionary from the traitlets library containing the proposed - change. The new value is in proposal["value"]. + proposal: A dictionary from the traitlets library containing the + proposed change. The new value is in proposal["value"]. """ + value = proposal["value"] if self.row_count == 0 or self.page_size == 0: return 0 @@ -120,22 +125,23 @@ def _get_next_batch(self) -> bool: try: iterator = self._get_batch_iterator() batch = next(iterator) - self._cached_data = pd.concat([self._cached_data, batch], ignore_index=True) + self._cached_batches.append(batch) return True except StopIteration: self._all_data_loaded = True - # update row count if we loaded all data - if self.row_count == 0: - self.row_count = len(self._cached_data) return False - except Exception as e: - raise RuntimeError(f"Error during batch processing: {str(e)}") from e def _get_batch_iterator(self) -> Iterator[pd.DataFrame]: """Lazily initializes and returns the batch iterator.""" - if self._batch_iterator is None: - self._batch_iterator = iter(self._batches) - return self._batch_iterator + if self._batch_iter is None: + self._batch_iter = iter(self._batches) + return self._batch_iter + + def _get_cached_data(self) -> pd.DataFrame: + """Combine all cached batches into a single DataFrame.""" + if not self._cached_batches: + return pd.DataFrame(columns=self._dataframe.columns) + return pd.concat(self._cached_batches, ignore_index=True) def _set_table_html(self): """Sets the current html data based on the current page and page size.""" @@ -143,11 +149,15 @@ def _set_table_html(self): end = start + self.page_size # fetch more data if the requested page is outside our cache - while len(self._cached_data) < end and not self._all_data_loaded: - self._get_next_batch() + cached_data = self._get_cached_data() + while len(cached_data) < end and not self._all_data_loaded: + if self._get_next_batch(): + cached_data = self._get_cached_data() + else: + break # Get the data for the current page - page_data = self._cached_data.iloc[start:end] + page_data = cached_data.iloc[start:end] # Generate HTML table self.table_html = page_data.to_html( diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 72452f11c8..05a943da74 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -68,14 +68,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "f289d250", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 6fddde2a-5b5f-4920-a0b1-cb38e636bab3 is DONE. 0 Bytes processed. Open Job" + "Query job ad1f38a1-8cfa-4df8-ad88-f5ee052e135e is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -83,6 +83,13 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computation deferred. Computation will process 171.4 MB\n" + ] } ], "source": [ @@ -100,19 +107,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "42bb02ab", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "Computation deferred. Computation will process 171.4 MB" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Computation deferred. Computation will process 171.4 MB\n" + ] } ], "source": [ @@ -135,17 +139,34 @@ "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/anywidget.py:138: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", - " self._cached_data = pd.concat([self._cached_data, batch], ignore_index=True)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/IPython/core/formatters.py:429: FormatterWarning: text/html formatter returned invalid type (expected ) for object: Computation deferred. Computation will process 171.4 MB\n", - " warnings.warn(\n" - ] + "data": { + "text/html": [ + "Query job 51ab6180-0ea3-4f9c-9cf0-02ae45ebb1be is DONE. 171.4 MB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e12cd42c7db14ef0aa882f20553b6318", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "TableWidget(row_count=5552452, table_html='=0.9.18", - "traitlets", + "traitlets>=5.0.0", ], } extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values())))) diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index dff245d176..155d4388a4 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -33,3 +33,6 @@ pytz==2022.7 toolz==0.11 typing-extensions==4.5.0 rich==12.4.4 +# For anywidget mode +anywidget>=0.9.18 +traitlets==5.0.0 diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 4bf23392d3..34d6309c6f 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -19,6 +19,11 @@ pytest.importorskip("anywidget") +# Test constants to avoid change detector tests +EXPECTED_ROW_COUNT = 6 +EXPECTED_PAGE_SIZE = 2 +EXPECTED_TOTAL_PAGES = 3 + @pytest.fixture(scope="module") def paginated_pandas_df() -> pd.DataFrame: @@ -50,7 +55,7 @@ def paginated_bf_df( return session.read_pandas(paginated_pandas_df) -@pytest.fixture(scope="module") +@pytest.fixture def table_widget(paginated_bf_df: bf.dataframe.DataFrame): """ Helper fixture to create a TableWidget instance with a fixed page size. @@ -59,8 +64,49 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame): from bigframes import display with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): - widget = display.TableWidget(paginated_bf_df) - return widget + # Delay context manager cleanup of `max_rows` until after tests finish. + yield display.TableWidget(paginated_bf_df) + + +@pytest.fixture(scope="module") +def small_pandas_df() -> pd.DataFrame: + """Create a DataFrame smaller than the page size for edge case testing.""" + return pd.DataFrame( + { + "id": [0, 1], + "page_indicator": ["small_row_1", "small_row_2"], + "value": [0, 1], + } + ) + + +@pytest.fixture(scope="module") +def small_bf_df( + session: bf.Session, small_pandas_df: pd.DataFrame +) -> bf.dataframe.DataFrame: + return session.read_pandas(small_pandas_df) + + +@pytest.fixture +def small_widget(small_bf_df): + """Helper fixture for tests using a DataFrame smaller than the page size.""" + from bigframes import display + + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5): + yield display.TableWidget(small_bf_df) + + +@pytest.fixture(scope="module") +def empty_pandas_df() -> pd.DataFrame: + """Create an empty DataFrame for edge case testing.""" + return pd.DataFrame(columns=["id", "page_indicator", "value"]) + + +@pytest.fixture(scope="module") +def empty_bf_df( + session: bf.Session, empty_pandas_df: pd.DataFrame +) -> bf.dataframe.DataFrame: + return session.read_pandas(empty_pandas_df) def _assert_html_matches_pandas_slice( @@ -85,44 +131,24 @@ def _assert_html_matches_pandas_slice( assert row["page_indicator"] not in table_html -def test_repr_anywidget_initialization_sets_page_to_zero( +def test_widget_initialization_should_set_default_state( paginated_bf_df: bf.dataframe.DataFrame, ): - """A TableWidget should initialize with the page number set to 0.""" - with bf.option_context("display.repr_mode", "anywidget"): + """ + A TableWidget should initialize with correct default values for the page, + page size, and total row count. + """ + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): from bigframes import display widget = display.TableWidget(paginated_bf_df) assert widget.page == 0 + assert widget.page_size == EXPECTED_PAGE_SIZE + assert widget.row_count == EXPECTED_ROW_COUNT -def test_repr_anywidget_initialization_sets_page_size_from_options( - paginated_bf_df: bf.dataframe.DataFrame, -): - """A TableWidget should initialize its page size from bf.options.""" - with bf.option_context("display.repr_mode", "anywidget"): - from bigframes import display - - widget = display.TableWidget(paginated_bf_df) - - assert widget.page_size == bf.options.display.max_rows - - -def test_repr_anywidget_initialization_sets_row_count( - paginated_bf_df: bf.dataframe.DataFrame, - paginated_pandas_df: pd.DataFrame, -): - """A TableWidget should initialize with the correct total row count.""" - with bf.option_context("display.repr_mode", "anywidget"): - from bigframes import display - - widget = display.TableWidget(paginated_bf_df) - - assert widget.row_count == len(paginated_pandas_df) - - -def test_repr_anywidget_display_first_page_on_load( +def test_widget_display_should_show_first_page_on_load( table_widget, paginated_pandas_df: pd.DataFrame ): """ @@ -136,7 +162,7 @@ def test_repr_anywidget_display_first_page_on_load( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_navigate_to_second_page( +def test_widget_navigation_should_display_second_page( table_widget, paginated_pandas_df: pd.DataFrame ): """ @@ -152,7 +178,7 @@ def test_repr_anywidget_navigate_to_second_page( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_navigate_to_last_page( +def test_widget_navigation_should_display_last_page( table_widget, paginated_pandas_df: pd.DataFrame ): """ @@ -168,7 +194,7 @@ def test_repr_anywidget_navigate_to_last_page( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_page_clamp_to_zero_for_negative_input( +def test_widget_navigation_should_clamp_to_zero_for_negative_input( table_widget, paginated_pandas_df: pd.DataFrame ): """ @@ -184,7 +210,7 @@ def test_repr_anywidget_page_clamp_to_zero_for_negative_input( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( +def test_widget_navigation_should_clamp_to_last_page_for_out_of_bounds_input( table_widget, paginated_pandas_df: pd.DataFrame ): """ @@ -211,7 +237,7 @@ def test_repr_anywidget_page_clamp_to_last_page_for_out_of_bounds_input( "Page 1 (Rows 3-5)", ], ) -def test_repr_anywidget_paginate_correctly_with_custom_page_size( +def test_widget_pagination_should_work_with_custom_page_size( paginated_bf_df: bf.dataframe.DataFrame, paginated_pandas_df: pd.DataFrame, page: int, @@ -234,3 +260,60 @@ def test_repr_anywidget_paginate_correctly_with_custom_page_size( assert widget.page == page _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) + + +def test_widget_with_few_rows_should_display_all_rows(small_widget, small_pandas_df): + """ + Given a DataFrame smaller than the page size, the widget should + display all rows on the first page. + """ + html = small_widget.table_html + + _assert_html_matches_pandas_slice(html, small_pandas_df, small_pandas_df) + + +def test_widget_with_few_rows_should_have_only_one_page(small_widget): + """ + Given a DataFrame smaller than the page size, the widget should + clamp page navigation, effectively having only one page. + """ + assert small_widget.page == 0 + + # Attempt to navigate past the end + small_widget.page = 1 + + # Should be clamped back to the only valid page + assert small_widget.page == 0 + + +def test_widget_page_size_should_be_immutable_after_creation( + paginated_bf_df: bf.dataframe.DataFrame, +): + """ + A widget's page size should be fixed on creation and not be affected + by subsequent changes to global options. + """ + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + from bigframes.display import TableWidget + + widget = TableWidget(paginated_bf_df) + assert widget.page_size == 2 + + # Navigate to second page to ensure widget is in a non-default state + widget.page = 1 + assert widget.page == 1 + + # Change global max_rows - widget should not be affected + bf.options.display.max_rows = 10 + + assert widget.page_size == 2 # Should remain unchanged + assert widget.page == 1 # Should remain on same page + + +# TODO(b/428918844, shuowei): Add test for empty results once this bug is fixed +# This test is blocked by b/428918844 which causes to_pandas_batches() +# to return empty iterables for empty DataFrames. + +# TODO(shuowei): Add tests for custom index and multiindex +# This may not be necessary for the SQL Cell use case but should be +# considered for completeness. From fa09dc84a2dfd86215d0129f363cf56769c4b86e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 7 Jul 2025 23:50:01 +0000 Subject: [PATCH 20/23] change function name --- bigframes/display/anywidget.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index fd363cd4d6..88345278ff 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -24,7 +24,10 @@ import bigframes -# Simplified import structure as suggested in review +# anywidget and traitlets are optional dependencies. We don't want the import of this +# module to fail if they aren't installed, though. Instead, we try to limit the surface that +# these packages could affect. This makes unit testing easier and ensures we don't +# accidentally make these required packages. try: import anywidget import traitlets @@ -123,7 +126,7 @@ def _get_next_batch(self) -> bool: return False try: - iterator = self._get_batch_iterator() + iterator = self._batch_iterator() batch = next(iterator) self._cached_batches.append(batch) return True @@ -131,7 +134,7 @@ def _get_next_batch(self) -> bool: self._all_data_loaded = True return False - def _get_batch_iterator(self) -> Iterator[pd.DataFrame]: + def _batch_iterator(self) -> Iterator[pd.DataFrame]: """Lazily initializes and returns the batch iterator.""" if self._batch_iter is None: self._batch_iter = iter(self._batches) From 9dd2f1ef21ecff211be650dd422ab018d55e0516 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 8 Jul 2025 22:54:29 +0000 Subject: [PATCH 21/23] remove widget reuse --- bigframes/dataframe.py | 17 ++------ bigframes/display/anywidget.py | 10 +++-- notebooks/dataframes/anywidget_mode.ipynb | 42 +++++++++---------- tests/system/small/test_anywidget.py | 50 +++++++++++++++++------ 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index bf53c3d056..57944756f0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -805,19 +805,10 @@ def _repr_html_(self) -> str: from bigframes import display - # Check if widget instance already exists and reuse it - widget = None - if ( - hasattr(self, "_anywidget_instance") - and self._anywidget_instance is not None - ): - widget = self._anywidget_instance() - - # If widget doesn't exist or was garbage collected, create a new one - if widget is None: - # Pass the processed dataframe (with blob URLs) to the widget - widget = display.TableWidget(df) - self._anywidget_instance = weakref.ref(widget) + # Always create a new widget instance for each display call + # This ensures that each cell gets its own widget and prevents + # unintended sharing between cells + widget = display.TableWidget(df.copy()) ipython_display(widget) return "" # Return empty string since we used display() diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 88345278ff..04d82c97fe 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -126,7 +126,7 @@ def _get_next_batch(self) -> bool: return False try: - iterator = self._batch_iterator() + iterator = self._batch_iterator batch = next(iterator) self._cached_batches.append(batch) return True @@ -134,13 +134,15 @@ def _get_next_batch(self) -> bool: self._all_data_loaded = True return False + @property def _batch_iterator(self) -> Iterator[pd.DataFrame]: """Lazily initializes and returns the batch iterator.""" if self._batch_iter is None: self._batch_iter = iter(self._batches) return self._batch_iter - def _get_cached_data(self) -> pd.DataFrame: + @property + def _cached_data(self) -> pd.DataFrame: """Combine all cached batches into a single DataFrame.""" if not self._cached_batches: return pd.DataFrame(columns=self._dataframe.columns) @@ -152,10 +154,10 @@ def _set_table_html(self): end = start + self.page_size # fetch more data if the requested page is outside our cache - cached_data = self._get_cached_data() + cached_data = self._cached_data while len(cached_data) < end and not self._all_data_loaded: if self._get_next_batch(): - cached_data = self._get_cached_data() + cached_data = self._cached_data else: break diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 05a943da74..072e5c6504 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -75,7 +75,7 @@ { "data": { "text/html": [ - "Query job ad1f38a1-8cfa-4df8-ad88-f5ee052e135e is DONE. 0 Bytes processed. Open Job" + "Query job 0b22b0f5-b952-4546-a969-41a89e343e9b is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -141,7 +141,7 @@ { "data": { "text/html": [ - "Query job 51ab6180-0ea3-4f9c-9cf0-02ae45ebb1be is DONE. 171.4 MB processed. Open Job" + "Query job 8e57da45-b6a7-44fb-8c4f-4b87058d94cb is DONE. 171.4 MB processed. Open Job" ], "text/plain": [ "" @@ -153,7 +153,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e12cd42c7db14ef0aa882f20553b6318", + "model_id": "4d00aaf284984cbc97483c651b9c5110", "version_major": 2, "version_minor": 1 }, @@ -204,7 +204,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b2fdf1fda7344d75aea47fc5de8d6729", + "model_id": "d4af4cf7d24d4f1c8e9c9b5f237df32b", "version_major": 2, "version_minor": 1 }, @@ -222,10 +222,10 @@ "import math\n", " \n", "# Create widget programmatically \n", - "widget = TableWidget(df) \n", - "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\") \n", + "widget = TableWidget(df)\n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", " \n", - "# Display the widget \n", + "# Display the widget\n", "widget" ] }, @@ -254,15 +254,15 @@ } ], "source": [ - "# Simulate button clicks programmatically \n", - "print(\"Current page:\", widget.page) \n", - " \n", - "# Go to next page \n", - "widget.page = 1 \n", - "print(\"After next:\", widget.page) \n", - " \n", - "# Go to previous page \n", - "widget.page = 0 \n", + "# Simulate button clicks programmatically\n", + "print(\"Current page:\", widget.page)\n", + "\n", + "# Go to next page\n", + "widget.page = 1\n", + "print(\"After next:\", widget.page)\n", + "\n", + "# Go to previous page\n", + "widget.page = 0\n", "print(\"After prev:\", widget.page)" ] }, @@ -290,7 +290,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "612cf38f9ab74908a1435b180b69ee95", + "model_id": "0f04ad3c464145ee9735eba09f5107a9", "version_major": 2, "version_minor": 1 }, @@ -304,10 +304,10 @@ } ], "source": [ - "# Test with very small dataset \n", - "small_df = df.head(5) \n", - "small_widget = TableWidget(small_df) \n", - "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\") \n", + "# Test with very small dataset\n", + "small_df = df.head(5)\n", + "small_widget = TableWidget(small_df)\n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", "small_widget" ] } diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 34d6309c6f..b6dfb22934 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -131,21 +131,26 @@ def _assert_html_matches_pandas_slice( assert row["page_indicator"] not in table_html -def test_widget_initialization_should_set_default_state( +def test_widget_initialization_should_calculate_total_row_count( paginated_bf_df: bf.dataframe.DataFrame, ): - """ - A TableWidget should initialize with correct default values for the page, - page size, and total row count. - """ - with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): - from bigframes import display + """A TableWidget should correctly calculate the total row count on creation.""" + from bigframes import display + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): widget = display.TableWidget(paginated_bf_df) - assert widget.page == 0 - assert widget.page_size == EXPECTED_PAGE_SIZE - assert widget.row_count == EXPECTED_ROW_COUNT + assert widget.row_count == EXPECTED_ROW_COUNT + + +def test_widget_initialization_should_set_default_pagination( + table_widget, +): + """A TableWidget should initialize with page 0 and the correct page size.""" + # The `table_widget` fixture already creates the widget. + # Assert its state. + assert table_widget.page == 0 + assert table_widget.page_size == EXPECTED_PAGE_SIZE def test_widget_display_should_show_first_page_on_load( @@ -310,9 +315,28 @@ def test_widget_page_size_should_be_immutable_after_creation( assert widget.page == 1 # Should remain on same page -# TODO(b/428918844, shuowei): Add test for empty results once this bug is fixed -# This test is blocked by b/428918844 which causes to_pandas_batches() -# to return empty iterables for empty DataFrames. +def test_empty_widget_should_have_zero_row_count(empty_bf_df: bf.dataframe.DataFrame): + """Given an empty DataFrame, the widget's row count should be 0.""" + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(empty_bf_df) + + assert widget.row_count == 0 + + +def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.DataFrame): + """Given an empty DataFrame, the widget should still render table headers.""" + with bf.option_context("display.repr_mode", "anywidget"): + from bigframes.display import TableWidget + + widget = TableWidget(empty_bf_df) + + html = widget.table_html + + assert " Date: Fri, 11 Jul 2025 16:54:33 +0000 Subject: [PATCH 22/23] remove weakref import --- bigframes/dataframe.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 57944756f0..8e6b8efbc8 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -39,7 +39,6 @@ Union, ) import warnings -import weakref import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.frame as vendored_pandas_frame @@ -88,7 +87,6 @@ if typing.TYPE_CHECKING: from _typeshed import SupportsRichComparison - from bigframes.display.anywidget import TableWidget import bigframes.session SingleItemValue = Union[bigframes.series.Series, int, float, str, Callable] @@ -113,9 +111,6 @@ class DataFrame(vendored_pandas_frame.DataFrame): # Must be above 5000 for pandas to delegate to bigframes for binops __pandas_priority__ = 15000 - # Type annotation for anywidget instance - _anywidget_instance: Optional[weakref.ReferenceType["TableWidget"]] = None - def __init__( self, data=None, From d5ffd8682a62f337d2bc28459a710a7bd737ac4a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Fri, 11 Jul 2025 16:57:58 +0000 Subject: [PATCH 23/23] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 16a933a629..e0deb6deb2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,7 +17,7 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE recursive-include third_party/bigframes_vendored * -recursive-include bigframes *.json *.proto py.typed +recursive-include bigframes *.json *.proto *.js py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__