From 7116e3e60ef0bb5ce70e7752351e76e29351e2fa Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Tue, 19 Mar 2024 21:28:09 +0000
Subject: [PATCH 1/4] fix: renable to_csv and to_json related tests

---
 tests/system/small/test_dataframe_io.py | 12 ++++++++----
 tests/system/small/test_series.py       |  6 ++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index adc729565e..afb97c3595 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -115,7 +115,6 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
         pd.testing.assert_series_equal(actual, expected)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 @pytest.mark.parametrize(
     ("index"),
     [True, False],
@@ -154,6 +153,7 @@ def test_to_csv_index(
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
+        storage_options=dict(expand=True),
     )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
@@ -164,7 +164,6 @@ def test_to_csv_index(
     pd.testing.assert_frame_equal(gcs_df, scalars_pandas_df)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_csv_tabs(
     scalars_dfs: Tuple[bigframes.dataframe.DataFrame, pd.DataFrame],
     gcs_folder: str,
@@ -194,6 +193,7 @@ def test_to_csv_tabs(
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
+        storage_options=dict(expand=True),
     )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
@@ -415,7 +415,6 @@ def test_to_json_index_invalid_lines(
         scalars_df.to_json(path, index=index)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 @pytest.mark.parametrize(
     ("index"),
     [True, False],
@@ -435,7 +434,12 @@ def test_to_json_index_records_orient(
     """ Test the `to_json` API with `orient` is `records` and `lines` is True"""
     scalars_df.to_json(path, index=index, orient="records", lines=True)
 
-    gcs_df = pd.read_json(path, lines=True, convert_dates=["datetime_col"])
+    gcs_df = pd.read_json(
+        path,
+        lines=True,
+        convert_dates=["datetime_col"],
+        storage_options=dict(expand=True),
+    )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     if index and scalars_df.index.name is not None:
         gcs_df = gcs_df.set_index(scalars_df.index.name)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index f63ea977ff..3c593a75bf 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2390,11 +2390,10 @@ def test_to_frame(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_json*.jsonl"
     scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
-    gcs_df = pd.read_json(path, lines=True)
+    gcs_df = pd.read_json(path, lines=True, storage_options=dict(expand=True))
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
@@ -2404,11 +2403,10 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     )
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_csv*.csv"
     scalars_df_index["int64_col"].to_csv(path)
-    gcs_df = pd.read_csv(path)
+    gcs_df = pd.read_csv(path, storage_options=dict(expand=True))
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),

From b867d24008c5d01df5727a6efe8996f28f017f99 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 20 Mar 2024 03:32:09 +0000
Subject: [PATCH 2/4] fix gcs file path

---
 tests/system/small/test_dataframe_io.py | 9 +++------
 tests/system/small/test_series.py       | 4 ++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index afb97c3595..401798d8aa 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -149,11 +149,10 @@ def test_to_csv_index(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path,
+        path.replace("*", "000000000000"),
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
-        storage_options=dict(expand=True),
     )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
@@ -188,12 +187,11 @@ def test_to_csv_tabs(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path,
+        path.replace("*", "000000000000"),
         sep="\t",
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
-        storage_options=dict(expand=True),
     )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
@@ -435,10 +433,9 @@ def test_to_json_index_records_orient(
     scalars_df.to_json(path, index=index, orient="records", lines=True)
 
     gcs_df = pd.read_json(
-        path,
+        path.replace("*", "000000000000"),
         lines=True,
         convert_dates=["datetime_col"],
-        storage_options=dict(expand=True),
     )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     if index and scalars_df.index.name is not None:
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 3c593a75bf..06db72700c 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2393,7 +2393,7 @@ def test_to_frame(scalars_dfs):
 def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_json*.jsonl"
     scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
-    gcs_df = pd.read_json(path, lines=True, storage_options=dict(expand=True))
+    gcs_df = pd.read_json(path.replace("*", "000000000000"), lines=True)
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
@@ -2406,7 +2406,7 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
 def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_csv*.csv"
     scalars_df_index["int64_col"].to_csv(path)
-    gcs_df = pd.read_csv(path, storage_options=dict(expand=True))
+    gcs_df = pd.read_csv(path.replace("*", "000000000000"))
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),

From 48c8038433de27e49670c9f2d8e955b3cbb1b771 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 20 Mar 2024 18:03:44 +0000
Subject: [PATCH 3/4] add global FIRST_GCS_FILE_SUFFIX

---
 tests/system/small/test_dataframe_io.py | 14 +++++++++-----
 tests/system/small/test_encryption.py   |  3 ++-
 tests/system/small/test_series.py       |  5 +++--
 tests/system/small/test_session.py      | 22 ++++++++++------------
 tests/system/utils.py                   |  2 ++
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 401798d8aa..a5fd29ab92 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -19,7 +19,11 @@
 import pyarrow as pa
 import pytest
 
-from tests.system.utils import assert_pandas_df_equal, convert_pandas_dtypes
+from tests.system.utils import (
+    assert_pandas_df_equal,
+    convert_pandas_dtypes,
+    FIRST_GCS_FILE_SUFFIX,
+)
 
 try:
     import pandas_gbq  # type: ignore
@@ -149,7 +153,7 @@ def test_to_csv_index(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path.replace("*", "000000000000"),
+        path.replace("*", FIRST_GCS_FILE_SUFFIX),
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
@@ -187,7 +191,7 @@ def test_to_csv_tabs(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path.replace("*", "000000000000"),
+        path.replace("*", FIRST_GCS_FILE_SUFFIX),
         sep="\t",
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
@@ -433,7 +437,7 @@ def test_to_json_index_records_orient(
     scalars_df.to_json(path, index=index, orient="records", lines=True)
 
     gcs_df = pd.read_json(
-        path.replace("*", "000000000000"),
+        path.replace("*", FIRST_GCS_FILE_SUFFIX),
         lines=True,
         convert_dates=["datetime_col"],
     )
@@ -475,7 +479,7 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index):
     # table.
     scalars_df.to_parquet(path, index=index)
 
-    gcs_df = pd.read_parquet(path.replace("*", "000000000000"))
+    gcs_df = pd.read_parquet(path.replace("*", FIRST_GCS_FILE_SUFFIX))
     convert_pandas_dtypes(gcs_df, bytes_col=False)
     if index and scalars_df.index.name is not None:
         gcs_df = gcs_df.set_index(scalars_df.index.name)
diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py
index f13d2b9e1a..568a236b22 100644
--- a/tests/system/small/test_encryption.py
+++ b/tests/system/small/test_encryption.py
@@ -19,6 +19,7 @@
 
 import bigframes
 import bigframes.ml.linear_model
+from tests.system.utils import FIRST_GCS_FILE_SUFFIX
 
 
 @pytest.fixture(scope="module")
@@ -160,7 +161,7 @@ def test_read_csv_gcs(
     # Create a csv in gcs
     write_path = gcs_folder + "test_read_csv_gcs_bigquery_engine*.csv"
     read_path = (
-        write_path.replace("*", "000000000000") if engine is None else write_path
+        write_path.replace("*", FIRST_GCS_FILE_SUFFIX) if engine is None else write_path
     )
     scalars_df_index.to_csv(write_path)
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 06db72700c..04f986407e 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -27,6 +27,7 @@
 from tests.system.utils import (
     assert_pandas_df_equal,
     assert_series_equal,
+    FIRST_GCS_FILE_SUFFIX,
     skip_legacy_pandas,
 )
 
@@ -2393,7 +2394,7 @@ def test_to_frame(scalars_dfs):
 def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_json*.jsonl"
     scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
-    gcs_df = pd.read_json(path.replace("*", "000000000000"), lines=True)
+    gcs_df = pd.read_json(path.replace("*", FIRST_GCS_FILE_SUFFIX), lines=True)
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
@@ -2406,7 +2407,7 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
 def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_csv*.csv"
     scalars_df_index["int64_col"].to_csv(path)
-    gcs_df = pd.read_csv(path.replace("*", "000000000000"))
+    gcs_df = pd.read_csv(path.replace("*", FIRST_GCS_FILE_SUFFIX))
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index d0cd24e2be..b6634fc295 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -30,9 +30,7 @@
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.ml.linear_model
-from tests.system.utils import skip_legacy_pandas
-
-FIRST_FILE = "000000000000"
+from tests.system.utils import FIRST_GCS_FILE_SUFFIX, skip_legacy_pandas
 
 
 def test_read_gbq_tokyo(
@@ -442,7 +440,7 @@ def test_read_csv_gcs_default_engine(session, scalars_dfs, gcs_folder):
         path = gcs_folder + "test_read_csv_gcs_default_engine_w_index*.csv"
     else:
         path = gcs_folder + "test_read_csv_gcs_default_engine_wo_index*.csv"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df.to_csv(path, index=False)
     dtype = scalars_df.dtypes.to_dict()
     dtype.pop("geography_col")
@@ -641,7 +639,7 @@ def test_read_csv_default_engine_throws_not_implemented_error(
         gcs_folder
         + "test_read_csv_gcs_default_engine_throws_not_implemented_error*.csv"
     )
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df_index.to_csv(path)
     with pytest.raises(NotImplementedError, match=match):
         session.read_csv(read_path, **kwargs)
@@ -649,7 +647,7 @@ def test_read_csv_default_engine_throws_not_implemented_error(
 
 def test_read_csv_gcs_default_engine_w_header(session, scalars_df_index, gcs_folder):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_header*.csv"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df_index.to_csv(path)
 
     # Skips header=N rows, normally considers the N+1th row as the header, but overridden by
@@ -716,7 +714,7 @@ def test_read_csv_gcs_default_engine_w_index_col_name(
     session, scalars_df_default_index, gcs_folder
 ):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_name*.csv"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df_default_index.to_csv(path)
 
     df = session.read_csv(read_path, index_col="rowindex")
@@ -731,7 +729,7 @@ def test_read_csv_gcs_default_engine_w_index_col_index(
     session, scalars_df_default_index, gcs_folder
 ):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_index*.csv"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df_default_index.to_csv(path)
 
     index_col = scalars_df_default_index.columns.to_list().index("rowindex")
@@ -790,7 +788,7 @@ def test_read_csv_local_default_engine_w_index_col_index(
 def test_read_csv_gcs_w_usecols(session, scalars_df_index, gcs_folder, engine):
     path = gcs_folder + "test_read_csv_gcs_w_usecols"
     path = path + "_default_engine*.csv" if engine is None else path + "_bq_engine*.csv"
-    read_path = path.replace("*", FIRST_FILE) if engine is None else path
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX) if engine is None else path
     scalars_df_index.to_csv(path)
 
     # df should only have 1 column which is bool_col.
@@ -902,7 +900,7 @@ def test_read_parquet_gcs(session: bigframes.Session, scalars_dfs, gcs_folder, e
 
     # Only bigquery engine for reads supports wildcards in path name.
     if engine != "bigquery":
-        path = path.replace("*", "000000000000")
+        path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
 
     df_out = (
         session.read_parquet(path, engine=engine)
@@ -1012,7 +1010,7 @@ def test_read_parquet_gcs_compression_not_supported(
 def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     path = gcs_folder + "test_read_json_gcs_bq_engine_w_index*.json"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df.to_json(path, index=False, lines=True, orient="records")
     df = session.read_json(read_path, lines=True, orient="records", engine="bigquery")
 
@@ -1036,7 +1034,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):
 def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     path = gcs_folder + "test_read_json_gcs_default_engine_w_index*.json"
-    read_path = path.replace("*", FIRST_FILE)
+    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
     scalars_df.to_json(
         path,
         index=False,
diff --git a/tests/system/utils.py b/tests/system/utils.py
index 8ea49ed7e2..a6444fff8f 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -28,6 +28,8 @@
 
 from bigframes.functions import remote_function
 
+FIRST_GCS_FILE_SUFFIX = "000000000000"
+
 
 def skip_legacy_pandas(test):
     @functools.wraps(test)

From 708e23400740463de991fda1ccd2540bfa62edd5 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 20 Mar 2024 19:11:20 +0000
Subject: [PATCH 4/4] trying to avoid import functions

---
 tests/system/small/test_dataframe_io.py | 32 +++++++++++--------------
 tests/system/small/test_encryption.py   |  4 ++--
 tests/system/small/test_series.py       |  6 ++---
 tests/system/small/test_session.py      | 24 +++++++++----------
 tests/system/utils.py                   |  6 +++--
 5 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index a5fd29ab92..10d7408790 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -19,11 +19,7 @@
 import pyarrow as pa
 import pytest
 
-from tests.system.utils import (
-    assert_pandas_df_equal,
-    convert_pandas_dtypes,
-    FIRST_GCS_FILE_SUFFIX,
-)
+from tests.system import utils
 
 try:
     import pandas_gbq  # type: ignore
@@ -153,12 +149,12 @@ def test_to_csv_index(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path.replace("*", FIRST_GCS_FILE_SUFFIX),
+        utils.get_first_file_from_wildcard(path),
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
     )
-    convert_pandas_dtypes(gcs_df, bytes_col=True)
+    utils.convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
 
     scalars_pandas_df = scalars_pandas_df.copy()
@@ -191,13 +187,13 @@ def test_to_csv_tabs(
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
     gcs_df = pd.read_csv(
-        path.replace("*", FIRST_GCS_FILE_SUFFIX),
+        utils.get_first_file_from_wildcard(path),
         sep="\t",
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
     )
-    convert_pandas_dtypes(gcs_df, bytes_col=True)
+    utils.convert_pandas_dtypes(gcs_df, bytes_col=True)
     gcs_df.index.name = scalars_df.index.name
 
     scalars_pandas_df = scalars_pandas_df.copy()
@@ -231,7 +227,7 @@ def test_to_gbq_index(scalars_dfs, dataset_id, index):
     else:
         df_out = df_out.sort_values("rowindex_2").reset_index(drop=True)
 
-    convert_pandas_dtypes(df_out, bytes_col=False)
+    utils.convert_pandas_dtypes(df_out, bytes_col=False)
     # pd.read_gbq interpets bytes_col as object, reconvert to pyarrow binary
     df_out["bytes_col"] = df_out["bytes_col"].astype(pd.ArrowDtype(pa.binary()))
     expected = scalars_pandas_df.copy()
@@ -437,11 +433,11 @@ def test_to_json_index_records_orient(
     scalars_df.to_json(path, index=index, orient="records", lines=True)
 
     gcs_df = pd.read_json(
-        path.replace("*", FIRST_GCS_FILE_SUFFIX),
+        utils.get_first_file_from_wildcard(path),
         lines=True,
         convert_dates=["datetime_col"],
     )
-    convert_pandas_dtypes(gcs_df, bytes_col=True)
+    utils.convert_pandas_dtypes(gcs_df, bytes_col=True)
     if index and scalars_df.index.name is not None:
         gcs_df = gcs_df.set_index(scalars_df.index.name)
 
@@ -479,8 +475,8 @@ def test_to_parquet_index(scalars_dfs, gcs_folder, index):
     # table.
     scalars_df.to_parquet(path, index=index)
 
-    gcs_df = pd.read_parquet(path.replace("*", FIRST_GCS_FILE_SUFFIX))
-    convert_pandas_dtypes(gcs_df, bytes_col=False)
+    gcs_df = pd.read_parquet(utils.get_first_file_from_wildcard(path))
+    utils.convert_pandas_dtypes(gcs_df, bytes_col=False)
     if index and scalars_df.index.name is not None:
         gcs_df = gcs_df.set_index(scalars_df.index.name)
 
@@ -512,7 +508,7 @@ def test_to_sql_query_unnamed_index_included(
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
     roundtrip.index.names = [None]
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
+    utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
 
 
 def test_to_sql_query_named_index_included(
@@ -529,7 +525,7 @@ def test_to_sql_query_named_index_included(
 
     pd_df = scalars_pandas_df_default_index.set_index("rowindex_2", drop=True)
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
+    utils.assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
 
 
 def test_to_sql_query_unnamed_index_excluded(
@@ -544,7 +540,7 @@ def test_to_sql_query_unnamed_index_excluded(
 
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(
+    utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
     )
 
@@ -563,6 +559,6 @@ def test_to_sql_query_named_index_excluded(
         "rowindex_2", drop=True
     ).reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(
+    utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
     )
diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py
index 568a236b22..70d2ce381f 100644
--- a/tests/system/small/test_encryption.py
+++ b/tests/system/small/test_encryption.py
@@ -19,7 +19,7 @@
 
 import bigframes
 import bigframes.ml.linear_model
-from tests.system.utils import FIRST_GCS_FILE_SUFFIX
+from tests.system import utils
 
 
 @pytest.fixture(scope="module")
@@ -161,7 +161,7 @@ def test_read_csv_gcs(
     # Create a csv in gcs
     write_path = gcs_folder + "test_read_csv_gcs_bigquery_engine*.csv"
     read_path = (
-        write_path.replace("*", FIRST_GCS_FILE_SUFFIX) if engine is None else write_path
+        utils.get_first_file_from_wildcard(write_path) if engine is None else write_path
     )
     scalars_df_index.to_csv(write_path)
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 04f986407e..dcb47d8c60 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -27,7 +27,7 @@
 from tests.system.utils import (
     assert_pandas_df_equal,
     assert_series_equal,
-    FIRST_GCS_FILE_SUFFIX,
+    get_first_file_from_wildcard,
     skip_legacy_pandas,
 )
 
@@ -2394,7 +2394,7 @@ def test_to_frame(scalars_dfs):
 def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_json*.jsonl"
     scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
-    gcs_df = pd.read_json(path.replace("*", FIRST_GCS_FILE_SUFFIX), lines=True)
+    gcs_df = pd.read_json(get_first_file_from_wildcard(path), lines=True)
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
@@ -2407,7 +2407,7 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
 def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     path = gcs_folder + "test_series_to_csv*.csv"
     scalars_df_index["int64_col"].to_csv(path)
-    gcs_df = pd.read_csv(path.replace("*", FIRST_GCS_FILE_SUFFIX))
+    gcs_df = pd.read_csv(get_first_file_from_wildcard(path))
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index b6634fc295..c6702aa032 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -30,7 +30,7 @@
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.ml.linear_model
-from tests.system.utils import FIRST_GCS_FILE_SUFFIX, skip_legacy_pandas
+from tests.system import utils
 
 
 def test_read_gbq_tokyo(
@@ -433,14 +433,14 @@ def test_read_pandas_tokyo(
     pd.testing.assert_frame_equal(result, expected)
 
 
-@skip_legacy_pandas
+@utils.skip_legacy_pandas
 def test_read_csv_gcs_default_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     if scalars_df.index.name is not None:
         path = gcs_folder + "test_read_csv_gcs_default_engine_w_index*.csv"
     else:
         path = gcs_folder + "test_read_csv_gcs_default_engine_wo_index*.csv"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df.to_csv(path, index=False)
     dtype = scalars_df.dtypes.to_dict()
     dtype.pop("geography_col")
@@ -490,7 +490,7 @@ def test_read_csv_gcs_bq_engine(session, scalars_dfs, gcs_folder):
         pytest.param("\t", id="custom_sep"),
     ],
 )
-@skip_legacy_pandas
+@utils.skip_legacy_pandas
 def test_read_csv_local_default_engine(session, scalars_dfs, sep):
     scalars_df, scalars_pandas_df = scalars_dfs
     with tempfile.TemporaryDirectory() as dir:
@@ -639,7 +639,7 @@ def test_read_csv_default_engine_throws_not_implemented_error(
         gcs_folder
         + "test_read_csv_gcs_default_engine_throws_not_implemented_error*.csv"
     )
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df_index.to_csv(path)
     with pytest.raises(NotImplementedError, match=match):
         session.read_csv(read_path, **kwargs)
@@ -647,7 +647,7 @@ def test_read_csv_default_engine_throws_not_implemented_error(
 
 def test_read_csv_gcs_default_engine_w_header(session, scalars_df_index, gcs_folder):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_header*.csv"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df_index.to_csv(path)
 
     # Skips header=N rows, normally considers the N+1th row as the header, but overridden by
@@ -714,7 +714,7 @@ def test_read_csv_gcs_default_engine_w_index_col_name(
     session, scalars_df_default_index, gcs_folder
 ):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_name*.csv"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df_default_index.to_csv(path)
 
     df = session.read_csv(read_path, index_col="rowindex")
@@ -729,7 +729,7 @@ def test_read_csv_gcs_default_engine_w_index_col_index(
     session, scalars_df_default_index, gcs_folder
 ):
     path = gcs_folder + "test_read_csv_gcs_default_engine_w_index_col_index*.csv"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df_default_index.to_csv(path)
 
     index_col = scalars_df_default_index.columns.to_list().index("rowindex")
@@ -788,7 +788,7 @@ def test_read_csv_local_default_engine_w_index_col_index(
 def test_read_csv_gcs_w_usecols(session, scalars_df_index, gcs_folder, engine):
     path = gcs_folder + "test_read_csv_gcs_w_usecols"
     path = path + "_default_engine*.csv" if engine is None else path + "_bq_engine*.csv"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX) if engine is None else path
+    read_path = utils.get_first_file_from_wildcard(path) if engine is None else path
     scalars_df_index.to_csv(path)
 
     # df should only have 1 column which is bool_col.
@@ -900,7 +900,7 @@ def test_read_parquet_gcs(session: bigframes.Session, scalars_dfs, gcs_folder, e
 
     # Only bigquery engine for reads supports wildcards in path name.
     if engine != "bigquery":
-        path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+        path = utils.get_first_file_from_wildcard(path)
 
     df_out = (
         session.read_parquet(path, engine=engine)
@@ -1010,7 +1010,7 @@ def test_read_parquet_gcs_compression_not_supported(
 def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     path = gcs_folder + "test_read_json_gcs_bq_engine_w_index*.json"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df.to_json(path, index=False, lines=True, orient="records")
     df = session.read_json(read_path, lines=True, orient="records", engine="bigquery")
 
@@ -1034,7 +1034,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder):
 def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
     scalars_df, _ = scalars_dfs
     path = gcs_folder + "test_read_json_gcs_default_engine_w_index*.json"
-    read_path = path.replace("*", FIRST_GCS_FILE_SUFFIX)
+    read_path = utils.get_first_file_from_wildcard(path)
     scalars_df.to_json(
         path,
         index=False,
diff --git a/tests/system/utils.py b/tests/system/utils.py
index a6444fff8f..e40502e6f2 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -28,8 +28,6 @@
 
 from bigframes.functions import remote_function
 
-FIRST_GCS_FILE_SUFFIX = "000000000000"
-
 
 def skip_legacy_pandas(test):
     @functools.wraps(test)
@@ -306,3 +304,7 @@ def delete_cloud_function(
     request = functions_v2.DeleteFunctionRequest(name=full_name)
     operation = functions_client.delete_function(request=request)
     return operation
+
+
+def get_first_file_from_wildcard(path):
+    return path.replace("*", "000000000000")