8000 chore: add exec_seconds attribute to ExecutionMetrics by Genesis929 · Pull Request #1104 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions bigframes/session/metrics.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,47 @@ class ExecutionMetrics:
execution_count: int = 0
slot_millis: int = 0
bytes_processed: int = 0
execution_secs: float = 0

def count_job_stats(self, query_job: bq_job.QueryJob):
stats = get_performance_stats(query_job)
if stats is not None:
bytes_processed, slot_millis, exec_seconds = stats
bytes_processed, slot_millis, execution_secs = stats
self.execution_count += 1
self.bytes_processed += bytes_processed
self.slot_millis += slot_millis
self.execution_secs += execution_secs
if LOGGING_NAME_ENV_VAR in os.environ:
# when running notebooks via pytest nbmake
write_stats_to_disk(bytes_processed, slot_millis, exec_seconds)
write_stats_to_disk(bytes_processed, slot_millis, execution_secs)


def get_performance_stats(
query_job: bigquery.QueryJob,
) -> Optional[Tuple[int, int, Optional[float]]]:
) -> Optional[Tuple[int, int, float]]:
"""Parse the query job for performance stats.

Return None if the stats do not reflect real work done in bigquery.
"""

if (
query_job.configuration.dry_run
or query_job.created is None
or query_job.ended is None
):
return None

bytes_processed = query_job.total_bytes_processed
if not isinstance(bytes_processed, int):
return None # filter out mocks
if query_job.configuration.dry_run:
# dry run stats are just predictions of the real run
bytes_processed = 0

slot_millis = query_job.slot_millis
if not isinstance(slot_millis, int):
return None # filter out mocks

if query_job.configuration.dry_run:
# dry run stats are just predictions of the real run
slot_millis = 0

exec_seconds = (
(query_job.ended - query_job.created).total_seconds()
if query_job.created is not None and query_job.ended is not None
else None
)
execution_secs = (query_job.ended - query_job.created).total_seconds()

return bytes_processed, slot_millis, exec_seconds
return bytes_processed, slot_millis, execution_secs


def write_stats_to_disk(
Expand Down
0