8000 fix: Fix __repr__ caching with partial ordering by TrevorBergeron · Pull Request #1016 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bigframes/core/tree_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def can_fast_head(node: nodes.BigFrameNode) -> bool:
"""Can get head fast if can push head operator down to leafs and operators preserve rows."""
if isinstance(node, nodes.LeafNode):
return node.supports_fast_head
if isinstance(node, nodes.UnaryNode):
return node.row_preserving and can_fast_head(node.child)
if isinstance(node, (nodes.ProjectionNode, nodes.SelectionNode)):
return can_fast_head(node.child)
return False


Expand Down
1 change: 0 additions & 1 deletion bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,6 @@ def __repr__(self) -> str:
if opts.repr_mode == "deferred":
return formatter.repr_query_job(self._compute_dry_run())

self._cached()
# TODO(swast): pass max_columns and get the true column count back. Maybe
# get 1 more column than we have requested so that pandas can add the
# ... for us?
Expand Down
5 changes: 0 additions & 5 deletions bigframes/session/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,6 @@ def _cache_with_cluster_cols(

def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
"""Executes the query and uses the resulting table to rewrite future executions."""

if not self.strictly_ordered:
raise ValueError(
"Caching with offsets only supported in strictly ordered mode."
)
offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
w_offsets, offset_column = array_value.promote_offsets()
sql = self.compiler.compile_unordered(self._get_optimized_plan(w_offsets.node))
Expand Down
4 changes: 2 additions & 2 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ def session_load() -> Generator[bigframes.Session, None, None]:
session.close() # close generated session at cleanup time


@pytest.fixture(scope="session", params=["ordered", "unordered"])
@pytest.fixture(scope="session", params=["strict", "partial"])
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
session = bigframes.Session(context=context)
yield session
session.close() # close generated session at cleanup type
Expand Down
24 changes: 24 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,30 @@ def test_repr_w_all_rows(scalars_dfs):
assert actual == expected


def test_join_repr(scalars_dfs_maybe_ordered):
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered

scalars_df = (
scalars_df[["int64_col"]]
.join(scalars_df.set_index("int64_col")[["int64_too"]])
.sort_index()
)
scalars_pandas_df = (
scalars_pandas_df[["int64_col"]]
.join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
.sort_index()
)
# Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
scalars_pandas_df.index.name = None

actual = repr(scalars_df)

with display_options.pandas_repr(bigframes.options.display):
expected = repr(scalars_pandas_df)

assert actual == expected


def test_repr_html_w_all_rows(scalars_dfs):
scalars_df, _ = scalars_dfs
# get a pandas df of the expected format
Expand Down
0