From 351fba1faa610c2b7a90e50acdc74909eea30be8 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 24 Sep 2024 17:25:25 +0000
Subject: [PATCH] fix: Fix __repr__ caching with partial ordering

---
 bigframes/core/tree_properties.py    |  4 ++--
 bigframes/dataframe.py               |  1 -
 bigframes/session/executor.py        |  5 -----
 tests/system/conftest.py             |  4 ++--
 tests/system/small/test_dataframe.py | 24 ++++++++++++++++++++++++
 5 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/bigframes/core/tree_properties.py b/bigframes/core/tree_properties.py
index 4978e75e38..1b0fe0d072 100644
--- a/bigframes/core/tree_properties.py
+++ b/bigframes/core/tree_properties.py
@@ -44,8 +44,8 @@ def can_fast_head(node: nodes.BigFrameNode) -> bool:
     """Can get head fast if can push head operator down to leafs and operators preserve rows."""
     if isinstance(node, nodes.LeafNode):
         return node.supports_fast_head
-    if isinstance(node, nodes.UnaryNode):
-        return node.row_preserving and can_fast_head(node.child)
+    if isinstance(node, (nodes.ProjectionNode, nodes.SelectionNode)):
+        return can_fast_head(node.child)
     return False
 
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 49a668f008..a59f6bf941 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -643,7 +643,6 @@ def __repr__(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
-        self._cached()
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index d2a2e0f1b2..f89b5aefec 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -360,11 +360,6 @@ def _cache_with_cluster_cols(
 
     def _cache_with_offsets(self, array_value: bigframes.core.ArrayValue):
         """Executes the query and uses the resulting table to rewrite future executions."""
-
-        if not self.strictly_ordered:
-            raise ValueError(
-                "Caching with offsets only supported in strictly ordered mode."
-            )
         offset_column = bigframes.core.guid.generate_guid("bigframes_offsets")
         w_offsets, offset_column = array_value.promote_offsets()
         sql = self.compiler.compile_unordered(self._get_optimized_plan(w_offsets.node))
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 5ee2dc6397..d9246eecfb 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -154,9 +154,9 @@ def session_load() -> Generator[bigframes.Session, None, None]:
     session.close()  # close generated session at cleanup time
 
 
-@pytest.fixture(scope="session", params=["ordered", "unordered"])
+@pytest.fixture(scope="session", params=["strict", "partial"])
 def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
-    context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
+    context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
     session = bigframes.Session(context=context)
     yield session
     session.close()  # close generated session at cleanup type
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 0a637e983f..340df93791 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -567,6 +567,30 @@ def test_repr_w_all_rows(scalars_dfs):
     assert actual == expected
 
 
+def test_join_repr(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    scalars_df = (
+        scalars_df[["int64_col"]]
+        .join(scalars_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    scalars_pandas_df = (
+        scalars_pandas_df[["int64_col"]]
+        .join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    # Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
+    scalars_pandas_df.index.name = None
+
+    actual = repr(scalars_df)
+
+    with display_options.pandas_repr(bigframes.options.display):
+        expected = repr(scalars_pandas_df)
+
+    assert actual == expected
+
+
 def test_repr_html_w_all_rows(scalars_dfs):
     scalars_df, _ = scalars_dfs
     # get a pandas df of the expected format