diff --git a/.kokoro/load/benchmark.cfg b/.kokoro/load/benchmark.cfg index bc2d7a2655..45e2527a0c 100644 --- a/.kokoro/load/benchmark.cfg +++ b/.kokoro/load/benchmark.cfg @@ -13,7 +13,7 @@ env_vars: { env_vars: { key: "GOOGLE_CLOUD_PROJECT" - value: "bigframes-load-testing" + value: "bigframes-benchmarking" } env_vars: { diff --git a/noxfile.py b/noxfile.py index f4142af21b..bf4c6f9641 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,6 +16,7 @@ from __future__ import absolute_import +import argparse import multiprocessing import os import pathlib @@ -804,7 +805,7 @@ def notebook(session: nox.Session): processes = [] for notebook, regions in notebooks_reg.items(): for region in regions: - args = ( + region_args = ( "python", "scripts/run_and_publish_benchmark.py", "--notebook", @@ -814,7 +815,7 @@ def notebook(session: nox.Session): if multi_process_mode: process = multiprocessing.Process( target=_run_process, - args=(session, args, error_flag), + args=(session, region_args, error_flag), ) process.start() processes.append(process) @@ -822,7 +823,7 @@ def notebook(session: nox.Session): # process to avoid potential race conditions。 time.sleep(1) else: - session.run(*args) + session.run(*region_args) for process in processes: process.join() @@ -861,7 +862,51 @@ def benchmark(session: nox.Session): session.install("-e", ".[all]") base_path = os.path.join("tests", "benchmark") - benchmark_script_list = list(pathlib.Path(base_path).rglob("*.py")) + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--iterations", + type=int, + default=1, + help="Number of iterations to run each benchmark.", + ) + parser.add_argument( + "-o", + "--output-csv", + nargs="?", + const=True, + default=False, + help=( + "Determines whether to output results to a CSV file. If no location is provided, " + "a temporary location is automatically generated." + ), + ) + parser.add_argument( + "-b", + "--benchmark-filter", + nargs="+", + help=( + "List of file or directory names to include in the benchmarks. If not provided, " + "all benchmarks are run." + ), + ) + + args = parser.parse_args(session.posargs) + + benchmark_script_list: List[pathlib.Path] = [] + if args.benchmark_filter: + for filter_item in args.benchmark_filter: + full_path = os.path.join(base_path, filter_item) + if os.path.isdir(full_path): + benchmark_script_list.extend(pathlib.Path(full_path).rglob("*.py")) + elif os.path.isfile(full_path) and full_path.endswith(".py"): + benchmark_script_list.append(pathlib.Path(full_path)) + else: + raise ValueError( + f"Item {filter_item} does not match any valid file or directory" + ) + else: + benchmark_script_list = list(pathlib.Path(base_path).rglob("*.py")) try: for benchmark in benchmark_script_list: @@ -871,12 +916,15 @@ def benchmark(session: nox.Session): "python", "scripts/run_and_publish_benchmark.py", f"--benchmark-path={benchmark}", + f"--iterations={args.iterations}", ) finally: session.run( "python", "scripts/run_and_publish_benchmark.py", f"--publish-benchmarks={base_path}", + f"--iterations={args.iterations}", + f"--output-csv={args.output_csv}", ) diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py index 4b3d4f4d96..a42301cb13 100644 --- a/scripts/run_and_publish_benchmark.py +++ b/scripts/run_and_publish_benchmark.py @@ -19,6 +19,7 @@ import pathlib import subprocess import sys +import tempfile from typing import Dict, List, Union import numpy as np @@ -50,7 +51,7 @@ def run_benchmark_subprocess(args, log_env_name_var, filename=None, region=None) subprocess.run(args, env=env, check=True) -def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame: +def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFrame: """Generate a DataFrame report on HTTP queries, bytes processed, slot time and execution time from log files.""" path = pathlib.Path(benchmark_path) try: @@ -100,28 +101,23 @@ def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame: with open(bytes_file, "r") as file: lines = file.read().splitlines() - query_count = len(lines) - total_bytes = sum(int(line) for line in lines) + query_count = len(lines) / iterations + total_bytes = sum(int(line) for line in lines) / iterations with open(millis_file, "r") as file: lines = file.read().splitlines() - total_slot_millis = sum(int(line) for line in lines) + total_slot_millis = sum(int(line) for line in lines) / iterations if has_local_seconds: - # 'local_seconds' captures the total execution time for a benchmark as it - # starts timing immediately before the benchmark code begins and stops - # immediately after it ends. Unlike other metrics that might accumulate - # values proportional to the number of queries executed, 'local_seconds' is - # a singular measure of the time taken for the complete execution of the - # benchmark, from start to finish. with open(local_seconds_file, "r") as file: - local_seconds = float(file.readline().strip()) + lines = file.read().splitlines() + local_seconds = sum(float(line) for line in lines) / iterations else: local_seconds = None with open(bq_seconds_file, "r") as file: lines = file.read().splitlines() - bq_seconds = sum(float(line) for line in lines) + bq_seconds = sum(float(line) for line in lines) / iterations results_dict[str(filename)] = [ query_count, @@ -154,7 +150,12 @@ def collect_benchmark_result(benchmark_path: str) -> pd.DataFrame: columns=columns, ) - print("---BIGQUERY USAGE REPORT---") + report_title = ( + "---BIGQUERY USAGE REPORT---" + if iterations == 1 + else f"---BIGQUERY USAGE REPORT (Averages over {iterations} Iterations)---" + ) + print(report_title) for index, row in benchmark_metrics.iterrows(): formatted_local_exec_time = ( f"{round(row['Local_Execution_Time_Sec'], 1)} seconds" @@ -259,7 +260,26 @@ def find_config(start_path): return None -def run_benchmark_from_config(benchmark: str): +def publish_to_bigquery(dataframe, notebook, project_name="bigframes-metrics"): + bigquery_table = ( + f"{project_name}.benchmark_report.notebook_benchmark" + if notebook + else f"{project_name}.benchmark_report.benchmark" + ) + + repo_status = get_repository_status() + for idx, col in enumerate(repo_status.keys()): + dataframe.insert(idx, col, repo_status[col]) + + pandas_gbq.to_gbq( + dataframe=dataframe, + destination_table=bigquery_table, + if_exists="append", + ) + print(f"Results have been successfully uploaded to {bigquery_table}.") + + +def run_benchmark_from_config(benchmark: str, iterations: int): print(benchmark) config_path = find_config(benchmark) @@ -267,24 +287,26 @@ def run_benchmark_from_config(benchmark: str): benchmark_configs = [] with open(config_path, "r") as f: for line in f: - config = json.loads(line) - python_args = [f"--{key}={value}" for key, value in config.items()] - suffix = ( - config["benchmark_suffix"] - if "benchmark_suffix" in config - else "_".join(f"{key}_{value}" for key, value in config.items()) - ) - benchmark_configs.append((suffix, python_args)) + if line.strip(): + config = json.loads(line) + python_args = [f"--{key}={value}" for key, value in config.items()] + suffix = ( + config["benchmark_suffix"] + if "benchmark_suffix" in config + else "_".join(f"{key}_{value}" for key, value in config.items()) + ) + benchmark_configs.append((suffix, python_args)) else: benchmark_configs = [(None, [])] - for benchmark_config in benchmark_configs: - args = ["python", str(benchmark)] - args.extend(benchmark_config[1]) - log_env_name_var = str(benchmark) - if benchmark_config[0] is not None: - log_env_name_var += f"_{benchmark_config[0]}" - run_benchmark_subprocess(args=args, log_env_name_var=log_env_name_var) + for _ in range(iterations): + for benchmark_config in benchmark_configs: + args = ["python", str(benchmark)] + args.extend(benchmark_config[1]) + log_env_name_var = str(benchmark) + if benchmark_config[0] is not None: + log_env_name_var += f"_{benchmark_config[0]}" + run_benchmark_subprocess(args=args, log_env_name_var=log_env_name_var) def run_notebook_benchmark(benchmark_file: str, region: str): @@ -341,6 +363,19 @@ def parse_arguments(): help="Set the benchmarks to be published to BigQuery.", ) + parser.add_argument( + "--iterations", + type=int, + default=1, + help="Number of iterations to run each benchmark.", + ) + parser.add_argument( + "--output-csv", + type=str, + default=None, + help="Determines whether to output results to a CSV file. If no location is provided, a temporary location is automatically generated.", + ) + return parser.parse_args() @@ -348,28 +383,39 @@ def main(): args = parse_arguments() if args.publish_benchmarks: - bigquery_table = ( - "bigframes-metrics.benchmark_report.notebook_benchmark" - if args.notebook - else "bigframes-metrics.benchmark_report.benchmark" + benchmark_metrics = collect_benchmark_result( + args.publish_benchmarks, args.iterations ) - benchmark_metrics = collect_benchmark_result(args.publish_benchmarks) - - if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true": - repo_status = get_repository_status() - for idx, col in enumerate(repo_status.keys()): - benchmark_metrics.insert(idx, col, repo_status[col]) - - pandas_gbq.to_gbq( - dataframe=benchmark_metrics, - destination_table=bigquery_table, - if_exists="append", + # Output results to CSV without specifying a location + if args.output_csv == "True": + current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + temp_file = tempfile.NamedTemporaryFile( + prefix=f"benchmark_{current_time}_", delete=False, suffix=".csv" ) - print("Results have been successfully uploaded to BigQuery.") + benchmark_metrics.to_csv(temp_file.name, index=False) + print( + f"Benchmark result is saved to a temporary location: {temp_file.name}" + ) + temp_file.close() + # Output results to CSV with specified a custom location + elif args.output_csv != "False": + benchmark_metrics.to_csv(args.output_csv, index=False) + print(f"Benchmark result is saved to: {args.output_csv}") + + # Publish the benchmark metrics to BigQuery under the 'bigframes-metrics' project. + # The 'BENCHMARK_AND_PUBLISH' environment variable should be set to 'true' only + # in specific Kokoro sessions. + if os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true": + publish_to_bigquery(benchmark_metrics, args.notebook) + # If the 'GCLOUD_BENCH_PUBLISH_PROJECT' environment variable is set, publish the + # benchmark metrics to a specified BigQuery table in the provided project. This is + # intended for local testing where the default behavior is not to publish results. + elif project := os.getenv("GCLOUD_BENCH_PUBLISH_PROJECT", ""): + publish_to_bigquery(benchmark_metrics, args.notebook, project) elif args.notebook: run_notebook_benchmark(args.benchmark_path, args.region) else: - run_benchmark_from_config(args.benchmark_path) + run_benchmark_from_config(args.benchmark_path, args.iterations) if __name__ == "__main__": diff --git a/tests/benchmark/tpch/config.jsonl b/tests/benchmark/tpch/config.jsonl index 1c8b4dd19a..e6f7a444f6 100644 --- a/tests/benchmark/tpch/config.jsonl +++ b/tests/benchmark/tpch/config.jsonl @@ -1,8 +1,10 @@ -{"benchmark_suffix": "1g_ordered", "dataset_id": "tpch_0001g", "ordered": true} -{"benchmark_suffix": "1g_unordered", "dataset_id": "tpch_0001g", "ordered": false} -{"benchmark_suffix": "10g_ordered", "dataset_id": "tpch_0010g", "ordered": true} -{"benchmark_suffix": "10g_unordered", "dataset_id": "tpch_0010g", "ordered": false} -{"benchmark_suffix": "100g_ordered", "dataset_id": "tpch_0100g", "ordered": true} -{"benchmark_suffix": "100g_unordered", "dataset_id": "tpch_0100g", "ordered": false} -{"benchmark_suffix": "1t_ordered", "dataset_id": "tpch_0001t", "ordered": true} -{"benchmark_suffix": "1t_unordered", "dataset_id": "tpch_0001t", "ordered": false} +{"benchmark_suffix": "1g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001g", "ordered": true} +{"benchmark_suffix": "1g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001g", "ordered": false} +{"benchmark_suffix": "10g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010g", "ordered": true} +{"benchmark_suffix": "10g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010g", "ordered": false} +{"benchmark_suffix": "100g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0100g", "ordered": true} +{"benchmark_suffix": "100g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0100g", "ordered": false} +{"benchmark_suffix": "1t_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001t", "ordered": true} +{"benchmark_suffix": "1t_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0001t", "ordered": false} +{"benchmark_suffix": "10t_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010t", "ordered": true} +{"benchmark_suffix": "10t_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "tpch_0010t", "ordered": false} diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py index 398e9bf9b2..3f1c63967e 100644 --- a/tests/benchmark/tpch/q1.py +++ b/tests/benchmark/tpch/q1.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q1.q, current_path, suffix, dataset_id, session + vendored_tpch_q1.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py index e500a657b6..bea18975ca 100644 --- a/tests/benchmark/tpch/q10.py +++ b/tests/benchmark/tpch/q10.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q10.q, current_path, suffix, dataset_id, session + vendored_tpch_q10.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py index 46bd2f022c..538d8d3e5f 100644 --- a/tests/benchmark/tpch/q11.py +++ b/tests/benchmark/tpch/q11.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q11.q, current_path, suffix, dataset_id, session + vendored_tpch_q11.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py index d2fab97699..6503b543f4 100644 --- a/tests/benchmark/tpch/q12.py +++ b/tests/benchmark/tpch/q12.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q12.q, current_path, suffix, dataset_id, session + vendored_tpch_q12.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py index 24eec7090c..60c2101f6f 100644 --- a/tests/benchmark/tpch/q13.py +++ b/tests/benchmark/tpch/q13.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q13.q, current_path, suffix, dataset_id, session + vendored_tpch_q13.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py index 8aa7ed4d2e..1698a01628 100644 --- a/tests/benchmark/tpch/q14.py +++ b/tests/benchmark/tpch/q14.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q14.q, current_path, suffix, dataset_id, session + vendored_tpch_q14.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py index 511cfbc683..49e2ce4e92 100644 --- a/tests/benchmark/tpch/q15.py +++ b/tests/benchmark/tpch/q15.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q15.q, current_path, suffix, dataset_id, session + vendored_tpch_q15.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py index 1d1f4b5f30..ef6edf6b12 100644 --- a/tests/benchmark/tpch/q16.py +++ b/tests/benchmark/tpch/q16.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q16.q, current_path, suffix, dataset_id, session + vendored_tpch_q16.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py index e285cc9fca..2f680d206e 100644 --- a/tests/benchmark/tpch/q17.py +++ b/tests/benchmark/tpch/q17.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q17.q, current_path, suffix, dataset_id, session + vendored_tpch_q17.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py index de9e4f2f94..7336246f1b 100644 --- a/tests/benchmark/tpch/q18.py +++ b/tests/benchmark/tpch/q18.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q18.q, current_path, suffix, dataset_id, session + vendored_tpch_q18.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py index 7b13b0d250..3bf34794bf 100644 --- a/tests/benchmark/tpch/q19.py +++ b/tests/benchmark/tpch/q19.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q19.q, current_path, suffix, dataset_id, session + vendored_tpch_q19.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py index 57151e36bd..c738aae124 100644 --- a/tests/benchmark/tpch/q2.py +++ b/tests/benchmark/tpch/q2.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q2.q, current_path, suffix, dataset_id, session + vendored_tpch_q2.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py index e02e9306f8..1b254c6a78 100644 --- a/tests/benchmark/tpch/q20.py +++ b/tests/benchmark/tpch/q20.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q20.q, current_path, suffix, dataset_id, session + vendored_tpch_q20.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py index d123286c3e..18e8df87fe 100644 --- a/tests/benchmark/tpch/q21.py +++ b/tests/benchmark/tpch/q21.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q21.q, current_path, suffix, dataset_id, session + vendored_tpch_q21.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py index 841740da0e..6c10021c2b 100644 --- a/tests/benchmark/tpch/q22.py +++ b/tests/benchmark/tpch/q22.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q22.q, current_path, suffix, dataset_id, session + vendored_tpch_q22.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py index 28cdfed49c..5bcaaa0d5d 100644 --- a/tests/benchmark/tpch/q3.py +++ b/tests/benchmark/tpch/q3.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q3.q, current_path, suffix, dataset_id, session + vendored_tpch_q3.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py index aa67cc77a0..462c6336d1 100644 --- a/tests/benchmark/tpch/q4.py +++ b/tests/benchmark/tpch/q4.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q4.q, current_path, suffix, dataset_id, session + vendored_tpch_q4.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py index bc39b76e82..108cde58cc 100644 --- a/tests/benchmark/tpch/q5.py +++ b/tests/benchmark/tpch/q5.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q5.q, current_path, suffix, dataset_id, session + vendored_tpch_q5.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py index a193333045..ccefc1b0bf 100644 --- a/tests/benchmark/tpch/q6.py +++ b/tests/benchmark/tpch/q6.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q6.q, current_path, suffix, dataset_id, session + vendored_tpch_q6.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py index 8a17eb91ea..0cad599a60 100644 --- a/tests/benchmark/tpch/q7.py +++ b/tests/benchmark/tpch/q7.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q7.q, current_path, suffix, dataset_id, session + vendored_tpch_q7.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py index b5e7d7aa37..6c6ac23b9b 100644 --- a/tests/benchmark/tpch/q8.py +++ b/tests/benchmark/tpch/q8.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q8.q, current_path, suffix, dataset_id, session + vendored_tpch_q8.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py index c098e9a8da..05c82fb66e 100644 --- a/tests/benchmark/tpch/q9.py +++ b/tests/benchmark/tpch/q9.py @@ -17,9 +17,9 @@ import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9 if __name__ == "__main__": - dataset_id, session, suffix = utils.get_tpch_configuration() + project_id, dataset_id, session, suffix = utils.get_tpch_configuration() current_path = pathlib.Path(__file__).absolute() utils.get_execution_time( - vendored_tpch_q9.q, current_path, suffix, dataset_id, session + vendored_tpch_q9.q, current_path, suffix, project_id, dataset_id, session ) diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py index c286d4e229..32be33fc74 100644 --- a/tests/benchmark/utils.py +++ b/tests/benchmark/utils.py @@ -43,6 +43,12 @@ def get_dbbenchmark_configuration(): def get_tpch_configuration(): parser = argparse.ArgumentParser(description="Process TPC-H Query using BigFrames.") + parser.add_argument( + "--project_id", + type=str, + required=True, + help="The BigQuery dataset ID to query.", + ) parser.add_argument( "--dataset_id", type=str, @@ -62,7 +68,7 @@ def get_tpch_configuration(): args = parser.parse_args() session = _initialize_session(_str_to_bool(args.ordered)) - return args.dataset_id, session, args.benchmark_suffix + return args.project_id, args.dataset_id, session, args.benchmark_suffix def get_execution_time(func, current_path, suffix, *args, **kwargs): @@ -73,7 +79,7 @@ def get_execution_time(func, current_path, suffix, *args, **kwargs): clock_time_file_path = f"{current_path}_{suffix}.local_exec_time_seconds" - with open(clock_time_file_path, "w") as log_file: + with open(clock_time_file_path, "a") as log_file: log_file.write(f"{runtime}\n") diff --git a/third_party/bigframes_vendored/tpch/queries/q1.py b/third_party/bigframes_vendored/tpch/queries/q1.py index 3f155ec1c9..e1fdf85f58 100644 --- a/third_party/bigframes_vendored/tpch/queries/q1.py +++ b/third_party/bigframes_vendored/tpch/queries/q1.py @@ -7,9 +7,9 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q10.py b/third_party/bigframes_vendored/tpch/queries/q10.py index 95b7b87b0e..75a8f2de7f 100644 --- a/third_party/bigframes_vendored/tpch/queries/q10.py +++ b/third_party/bigframes_vendored/tpch/queries/q10.py @@ -7,21 +7,21 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q11.py b/third_party/bigframes_vendored/tpch/queries/q11.py index 269728f6f1..484a7c0001 100644 --- a/third_party/bigframes_vendored/tpch/queries/q11.py +++ b/third_party/bigframes_vendored/tpch/queries/q11.py @@ -4,17 +4,17 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) partsupp = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PARTSUPP", + f"{project_id}.{dataset_id}.PARTSUPP", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q12.py b/third_party/bigframes_vendored/tpch/queries/q12.py index 1c39275932..e2b7aaf9f2 100644 --- a/third_party/bigframes_vendored/tpch/queries/q12.py +++ b/third_party/bigframes_vendored/tpch/queries/q12.py @@ -7,13 +7,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q13.py b/third_party/bigframes_vendored/tpch/queries/q13.py index dd69cf509f..ea2f0da284 100644 --- a/third_party/bigframes_vendored/tpch/queries/q13.py +++ b/third_party/bigframes_vendored/tpch/queries/q13.py @@ -6,13 +6,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q14.py b/third_party/bigframes_vendored/tpch/queries/q14.py index 8c25a5897d..27f3d9e224 100644 --- a/third_party/bigframes_vendored/tpch/queries/q14.py +++ b/third_party/bigframes_vendored/tpch/queries/q14.py @@ -5,13 +5,13 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q15.py b/third_party/bigframes_vendored/tpch/queries/q15.py index fae3010e36..042adbda8b 100644 --- a/third_party/bigframes_vendored/tpch/queries/q15.py +++ b/third_party/bigframes_vendored/tpch/queries/q15.py @@ -6,13 +6,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q16.py b/third_party/bigframes_vendored/tpch/queries/q16.py index b5eb62547f..1bd2795c42 100644 --- a/third_party/bigframes_vendored/tpch/queries/q16.py +++ b/third_party/bigframes_vendored/tpch/queries/q16.py @@ -4,17 +4,17 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) partsupp = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PARTSUPP", + f"{project_id}.{dataset_id}.PARTSUPP", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q17.py b/third_party/bigframes_vendored/tpch/queries/q17.py index a95d128b5c..0bd1c44315 100644 --- a/third_party/bigframes_vendored/tpch/queries/q17.py +++ b/third_party/bigframes_vendored/tpch/queries/q17.py @@ -4,13 +4,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q18.py b/third_party/bigframes_vendored/tpch/queries/q18.py index dac9f5c438..f645a08681 100644 --- a/third_party/bigframes_vendored/tpch/queries/q18.py +++ b/third_party/bigframes_vendored/tpch/queries/q18.py @@ -6,17 +6,17 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q19.py b/third_party/bigframes_vendored/tpch/queries/q19.py index 526d0aa1a6..1371af53fc 100644 --- a/third_party/bigframes_vendored/tpch/queries/q19.py +++ b/third_party/bigframes_vendored/tpch/queries/q19.py @@ -4,13 +4,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q2.py b/third_party/bigframes_vendored/tpch/queries/q2.py index 0586a5d1a5..f388252993 100644 --- a/third_party/bigframes_vendored/tpch/queries/q2.py +++ b/third_party/bigframes_vendored/tpch/queries/q2.py @@ -3,25 +3,25 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): region = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.REGION", + f"{project_id}.{dataset_id}.REGION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) partsupp = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PARTSUPP", + f"{project_id}.{dataset_id}.PARTSUPP", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q20.py b/third_party/bigframes_vendored/tpch/queries/q20.py index 671d7e06fb..26651a31c4 100644 --- a/third_party/bigframes_vendored/tpch/queries/q20.py +++ b/third_party/bigframes_vendored/tpch/queries/q20.py @@ -6,25 +6,25 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) partsupp = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PARTSUPP", + f"{project_id}.{dataset_id}.PARTSUPP", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q21.py b/third_party/bigframes_vendored/tpch/queries/q21.py index 3a4ea495c9..b844cd6f6b 100644 --- a/third_party/bigframes_vendored/tpch/queries/q21.py +++ b/third_party/bigframes_vendored/tpch/queries/q21.py @@ -6,21 +6,21 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q22.py b/third_party/bigframes_vendored/tpch/queries/q22.py index 97180cd11a..137a7d5c36 100644 --- a/third_party/bigframes_vendored/tpch/queries/q22.py +++ b/third_party/bigframes_vendored/tpch/queries/q22.py @@ -4,13 +4,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q3.py b/third_party/bigframes_vendored/tpch/queries/q3.py index 94b85f2054..fb09abe159 100644 --- a/third_party/bigframes_vendored/tpch/queries/q3.py +++ b/third_party/bigframes_vendored/tpch/queries/q3.py @@ -5,17 +5,17 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q4.py b/third_party/bigframes_vendored/tpch/queries/q4.py index 9cc6f73c98..b89f70845f 100644 --- a/third_party/bigframes_vendored/tpch/queries/q4.py +++ b/third_party/bigframes_vendored/tpch/queries/q4.py @@ -8,13 +8,13 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q5.py b/third_party/bigframes_vendored/tpch/queries/q5.py index b3a9b5e614..9839c025a5 100644 --- a/third_party/bigframes_vendored/tpch/queries/q5.py +++ b/third_party/bigframes_vendored/tpch/queries/q5.py @@ -5,29 +5,29 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): region = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.REGION", + f"{project_id}.{dataset_id}.REGION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q6.py b/third_party/bigframes_vendored/tpch/queries/q6.py index 13341c4f4e..b883837fe2 100644 --- a/third_party/bigframes_vendored/tpch/queries/q6.py +++ b/third_party/bigframes_vendored/tpch/queries/q6.py @@ -5,9 +5,9 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q7.py b/third_party/bigframes_vendored/tpch/queries/q7.py index d922efd1e2..a4dfe3f12e 100644 --- a/third_party/bigframes_vendored/tpch/queries/q7.py +++ b/third_party/bigframes_vendored/tpch/queries/q7.py @@ -8,25 +8,25 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q8.py b/third_party/bigframes_vendored/tpch/queries/q8.py index 4520fce14a..1676ec6349 100644 --- a/third_party/bigframes_vendored/tpch/queries/q8.py +++ b/third_party/bigframes_vendored/tpch/queries/q8.py @@ -5,33 +5,33 @@ import bigframes -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): customer = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.CUSTOMER", + f"{project_id}.{dataset_id}.CUSTOMER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) region = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.REGION", + f"{project_id}.{dataset_id}.REGION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, ) diff --git a/third_party/bigframes_vendored/tpch/queries/q9.py b/third_party/bigframes_vendored/tpch/queries/q9.py index d04117f0f1..c2b52789bd 100644 --- a/third_party/bigframes_vendored/tpch/queries/q9.py +++ b/third_party/bigframes_vendored/tpch/queries/q9.py @@ -6,29 +6,29 @@ import bigframes.pandas as bpd -def q(dataset_id: str, session: bigframes.Session): +def q(project_id: str, dataset_id: str, session: bigframes.Session): lineitem = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.LINEITEM", + f"{project_id}.{dataset_id}.LINEITEM", index_col=bigframes.enums.DefaultIndexKind.NULL, ) nation = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.NATION", + f"{project_id}.{dataset_id}.NATION", index_col=bigframes.enums.DefaultIndexKind.NULL, ) orders = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.ORDERS", + f"{project_id}.{dataset_id}.ORDERS", index_col=bigframes.enums.DefaultIndexKind.NULL, ) part = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PART", + f"{project_id}.{dataset_id}.PART", index_col=bigframes.enums.DefaultIndexKind.NULL, ) partsupp = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.PARTSUPP", + f"{project_id}.{dataset_id}.PARTSUPP", index_col=bigframes.enums.DefaultIndexKind.NULL, ) supplier = session.read_gbq( - f"bigframes-dev-perf.{dataset_id}.SUPPLIER", + f"{project_id}.{dataset_id}.SUPPLIER", index_col=bigframes.enums.DefaultIndexKind.NULL, )