From b83c4fbc7ea9d646afbc684ae938540719d4ddd0 Mon Sep 17 00:00:00 2001
From: Duc Le Tu <tuduc93@gmail.com>
Date: Mon, 29 Jan 2024 17:36:29 +0000
Subject: [PATCH 1/4] fix merge df same source after filter

---
 bigframes/core/compile/single_column.py |  4 ++
 tests/system/conftest.py                |  7 +++
 tests/system/small/test_pandas_etc.py   | 71 +++++++++++++++++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 tests/system/small/test_pandas_etc.py

diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index d26e71d1b4..895ac8b90f 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -64,6 +64,8 @@ def join_by_column_ordered(
             .equals(right._get_ibis_column(rcol).name("index"))
             for lcol, rcol in join.conditions
         )
+        and not left._predicates
+        and not right._predicates
     ):
         return bigframes.core.compile.row_identity.join_by_row_identity_ordered(
             left, right, join_def=join
@@ -165,6 +167,8 @@ def join_by_column_unordered(
             .equals(right._get_ibis_column(rcol).name("index"))
             for lcol, rcol in join.conditions
         )
+        and not left._predicates
+        and not right._predicates
     ):
         return bigframes.core.compile.row_identity.join_by_row_identity_unordered(
             left, right, join_def=join
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 0ad4280497..97057d6f47 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -280,6 +280,13 @@ def scalars_table_id(test_data_tables) -> str:
     return test_data_tables["scalars"]
 
 
+@pytest.fixture(scope="session")
+def baseball_schedules_df(session: bigframes.Session) -> bigframes.dataframe.DataFrame:
+    """Public BQ table"""
+    df = session.read_gbq("bigquery-public-data.baseball.schedules")
+    return df
+
+
 @pytest.fixture(scope="session")
 def hockey_table_id(test_data_tables) -> str:
     return test_data_tables["hockey_players"]
diff --git a/tests/system/small/test_pandas_etc.py b/tests/system/small/test_pandas_etc.py
new file mode 100644
index 0000000000..b68fd67c6c
--- /dev/null
+++ b/tests/system/small/test_pandas_etc.py
@@ -0,0 +1,71 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import pytest
+
+import bigframes.pandas as bpd
+from tests.system.utils import assert_pandas_df_equal
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_merge_after_filter(baseball_schedules_df, merge_how):
+    on = ["awayTeamName"]
+    left_columns = [
+        "gameId",
+        "year",
+        "homeTeamName",
+        "awayTeamName",
+        "duration_minutes",
+    ]
+    right_columns = [
+        "gameId",
+        "year",
+        "homeTeamName",
+        "awayTeamName",
+        "duration_minutes",
+    ]
+
+    left = baseball_schedules_df[left_columns]
+    left = left[left["homeTeamName"] == "Rays"]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = baseball_schedules_df[right_columns]
+    right = right[right["homeTeamName"] == "White Sox"]
+
+    df = left.merge(right, on=on, how=merge_how)
+    bf_result = df.to_pandas()
+
+    left_pandas = baseball_schedules_df.to_pandas()[left_columns]
+    left_pandas = left_pandas[left_pandas["homeTeamName"] == "Rays"]
+
+    right_pandas = baseball_schedules_df.to_pandas()[right_columns]
+    right_pandas = right_pandas[right_pandas["homeTeamName"] == "White Sox"]
+
+    pd_result = pd.merge(
+        left_pandas,
+        right_pandas,
+        merge_how,
+        on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)

From 8fc2b6a6901cb7457aafa51460d3f6fdd05a84fe Mon Sep 17 00:00:00 2001
From: Duc Le Tu <tuduc93@gmail.com>
Date: Mon, 19 Feb 2024 15:12:51 +0700
Subject: [PATCH 2/4] Update test_pandas_etc.py

---
 tests/system/small/test_pandas_etc.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/system/small/test_pandas_etc.py b/tests/system/small/test_pandas_etc.py
index b68fd67c6c..737baad688 100644
--- a/tests/system/small/test_pandas_etc.py
+++ b/tests/system/small/test_pandas_etc.py
@@ -15,7 +15,6 @@
 import pandas as pd
 import pytest
 
-import bigframes.pandas as bpd
 from tests.system.utils import assert_pandas_df_equal
 
 

From 62ff7798d29130a721cb80386383eb086cd22d40 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 7 Mar 2024 16:09:40 +0000
Subject: [PATCH 3/4] default row identity joins to false

---
 bigframes/core/__init__.py                             |  2 +-
 bigframes/core/compile/row_identity.py                 | 10 +++++++---
 bigframes/core/compile/single_column.py                |  8 ++------
 bigframes/core/nodes.py                                |  2 +-
 .../test_issue355_merge_after_filter.py}               |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)
 rename tests/system/small/{test_pandas_etc.py => regression/test_issue355_merge_after_filter.py} (98%)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 4dc2e4d7af..9032993452 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -349,7 +349,7 @@ def join(
         self,
         other: ArrayValue,
         join_def: join_def.JoinDefinition,
-        allow_row_identity_join: bool = True,
+        allow_row_identity_join: bool = False,
     ):
         return ArrayValue(
             nodes.JoinNode(
diff --git a/bigframes/core/compile/row_identity.py b/bigframes/core/compile/row_identity.py
index f46e2f9463..a376b59b06 100644
--- a/bigframes/core/compile/row_identity.py
+++ b/bigframes/core/compile/row_identity.py
@@ -28,7 +28,7 @@
 import bigframes.core.joins as joining
 import bigframes.core.ordering as orderings
 
-SUPPORTED_ROW_IDENTITY_HOW = {"outer", "left", "inner"}
+SUPPORTED_ROW_IDENTITY_HOW = {"outer", "left", "right", "inner"}
 
 
 def join_by_row_identity_unordered(
@@ -39,7 +39,8 @@ def join_by_row_identity_unordered(
     """Compute join when we are joining by row identity not a specific column."""
     if join_def.type not in SUPPORTED_ROW_IDENTITY_HOW:
         raise NotImplementedError(
-            f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
+            f"Only how={repr(SUPPORTED_ROW_IDENTITY_HOW)} currently supported. "
+            + constants.FEEDBACK_LINK
         )
 
     if not left._table.equals(right._table):
@@ -98,7 +99,8 @@ def join_by_row_identity_ordered(
     """Compute join when we are joining by row identity not a specific column."""
     if join_def.type not in SUPPORTED_ROW_IDENTITY_HOW:
         raise NotImplementedError(
-            f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
+            f"Only how={repr(SUPPORTED_ROW_IDENTITY_HOW)} currently supported. "
+            + constants.FEEDBACK_LINK
         )
 
     if not left._table.equals(right._table):
@@ -225,6 +227,8 @@ def _join_predicates(
         return (joined_predicates,)
     if join_type == "left":
         return tuple(left_predicates)
+    if join_type == "right":
+        return tuple(right_predicates)
     if join_type == "inner":
         _, right_relative_predicates = _get_relative_predicates(
             left_predicates, right_predicates
diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index 895ac8b90f..7beebfcb66 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -33,7 +33,7 @@ def join_by_column_ordered(
     left: compiled.OrderedIR,
     right: compiled.OrderedIR,
     join: join_defs.JoinDefinition,
-    allow_row_identity_join: bool = True,
+    allow_row_identity_join: bool = False,
 ) -> compiled.OrderedIR:
     """Join two expressions by column equality.
 
@@ -64,8 +64,6 @@ def join_by_column_ordered(
             .equals(right._get_ibis_column(rcol).name("index"))
             for lcol, rcol in join.conditions
         )
-        and not left._predicates
-        and not right._predicates
     ):
         return bigframes.core.compile.row_identity.join_by_row_identity_ordered(
             left, right, join_def=join
@@ -136,7 +134,7 @@ def join_by_column_unordered(
     left: compiled.UnorderedIR,
     right: compiled.UnorderedIR,
     join: join_defs.JoinDefinition,
-    allow_row_identity_join: bool = True,
+    allow_row_identity_join: bool = False,
 ) -> compiled.UnorderedIR:
     """Join two expressions by column equality.
 
@@ -167,8 +165,6 @@ def join_by_column_unordered(
             .equals(right._get_ibis_column(rcol).name("index"))
             for lcol, rcol in join.conditions
         )
-        and not left._predicates
-        and not right._predicates
     ):
         return bigframes.core.compile.row_identity.join_by_row_identity_unordered(
             left, right, join_def=join
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index f637177a94..1cd3277cbc 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -115,7 +115,7 @@ class JoinNode(BigFrameNode):
     left_child: BigFrameNode
     right_child: BigFrameNode
     join: JoinDefinition
-    allow_row_identity_join: bool = True
+    allow_row_identity_join: bool = False
 
     @property
     def row_preserving(self) -> bool:
diff --git a/tests/system/small/test_pandas_etc.py b/tests/system/small/regression/test_issue355_merge_after_filter.py
similarity index 98%
rename from tests/system/small/test_pandas_etc.py
rename to tests/system/small/regression/test_issue355_merge_after_filter.py
index 737baad688..24ee01cb7f 100644
--- a/tests/system/small/test_pandas_etc.py
+++ b/tests/system/small/regression/test_issue355_merge_after_filter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From c1810eea3cdae396b8bc716689a2f0f57496786b Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 7 Mar 2024 16:12:22 +0000
Subject: [PATCH 4/4] revert row identity join changes

---
 bigframes/core/compile/row_identity.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/bigframes/core/compile/row_identity.py b/bigframes/core/compile/row_identity.py
index a376b59b06..f46e2f9463 100644
--- a/bigframes/core/compile/row_identity.py
+++ b/bigframes/core/compile/row_identity.py
@@ -28,7 +28,7 @@
 import bigframes.core.joins as joining
 import bigframes.core.ordering as orderings
 
-SUPPORTED_ROW_IDENTITY_HOW = {"outer", "left", "right", "inner"}
+SUPPORTED_ROW_IDENTITY_HOW = {"outer", "left", "inner"}
 
 
 def join_by_row_identity_unordered(
@@ -39,8 +39,7 @@ def join_by_row_identity_unordered(
     """Compute join when we are joining by row identity not a specific column."""
     if join_def.type not in SUPPORTED_ROW_IDENTITY_HOW:
         raise NotImplementedError(
-            f"Only how={repr(SUPPORTED_ROW_IDENTITY_HOW)} currently supported. "
-            + constants.FEEDBACK_LINK
+            f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
         )
 
     if not left._table.equals(right._table):
@@ -99,8 +98,7 @@ def join_by_row_identity_ordered(
     """Compute join when we are joining by row identity not a specific column."""
     if join_def.type not in SUPPORTED_ROW_IDENTITY_HOW:
         raise NotImplementedError(
-            f"Only how={repr(SUPPORTED_ROW_IDENTITY_HOW)} currently supported. "
-            + constants.FEEDBACK_LINK
+            f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
         )
 
     if not left._table.equals(right._table):
@@ -227,8 +225,6 @@ def _join_predicates(
         return (joined_predicates,)
     if join_type == "left":
         return tuple(left_predicates)
-    if join_type == "right":
-        return tuple(right_predicates)
     if join_type == "inner":
         _, right_relative_predicates = _get_relative_predicates(
             left_predicates, right_predicates