10000 feat: Implement GcsEvalSetResultsManager to handle storage of eval se… · sudix/adk-python@0a5cf45 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0a5cf45

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: Implement GcsEvalSetResultsManager to handle storage of eval sets on GCS, and refactor eval set results manager
Eval results will be stored as json files under `gs://{bucket_name}/{app_name}/evals/eval_history/` PiperOrigin-RevId: 770499242
1 parent 1551bd4 commit 0a5cf45

9 files changed

+503
-155
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import time
18+
19+
from .eval_result import EvalCaseResult
20+
from .eval_result import EvalSetResult
21+
22+
23+
def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
24+
"""Sanitizes the eval set result name."""
25+
return eval_set_result_name.replace("/", "_")
26+
27+
28+
def create_eval_set_result(
29+
app_name: str,
30+
eval_set_id: str,
31+
eval_case_results: list[EvalCaseResult],
32+
) -> EvalSetResult:
33+
"""Creates a new EvalSetResult given eval_case_results."""
34+
timestamp = time.time()
35+
eval_set_result_id = f"{app_name}_{eval_set_id}_{timestamp}"
36+
eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
37+
eval_set_result = EvalSetResult(
38+
eval_set_result_id=eval_set_result_id,
39+
eval_set_result_name=eval_set_result_name,
40+
eval_set_id=eval_set_id,
41+
eval_case_results=eval_case_results,
42+
creation_timestamp=timestamp,
43+
)
44+
return eval_set_result

src/google/adk/evaluation/eval_result.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ class EvalCaseResult(BaseModel):
3636
populate_by_name=True,
3737
)
3838

39-
eval_set_file: str = Field(
39+
eval_set_file: Optional[str] = Field(
4040
deprecated=True,
41+
default=None,
4142
description="This field is deprecated, use eval_set_id instead.",
4243
)
4344
eval_set_id: str = ""
@@ -49,11 +50,15 @@ class EvalCaseResult(BaseModel):
4950
final_eval_status: EvalStatus
5051
"""Final eval status for this eval case."""
5152

52-
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
53-
deprecated=True,
54-
description=(
55-
"This field is deprecated, use overall_eval_metric_results instead."
56-
),
53+
eval_metric_results: Optional[list[tuple[EvalMetric, EvalMetricResult]]] = (
54+
Field(
55+
deprecated=True,
56+
default=None,
57+
description=(
58+
"This field is deprecated, use overall_eval_metric_results"
59+
" instead."
60+
),
61+
)
5762
)
5863

5964
overall_eval_metric_results: list[EvalMetricResult]
@@ -80,7 +85,7 @@ class EvalSetResult(BaseModel):
8085
populate_by_name=True,
8186
)
8287
eval_set_result_id: str
83-
eval_set_result_name: str
88+
eval_set_result_name: Optional[str] = None
8489
eval_set_id: str
8590
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
8691
creation_timestamp: float = 0.0

src/google/adk/evaluation/eval_set_results_manager.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from abc import ABC
1818
from abc import abstractmethod
19+
from typing import Optional
1920

2021
from .eval_result import EvalCaseResult
2122
from .eval_result import EvalSetResult
@@ -38,7 +39,11 @@ def save_eval_set_result(
3839
def get_eval_set_result(
3940
self, app_name: str, eval_set_result_id: str
4041
) -> EvalSetResult:
41-
"""Returns an EvalSetResult identified by app_name and eval_set_result_id."""
42+
"""Returns the EvalSetResult from app_name and eval_set_result_id.
43+
44+
Raises:
45+
NotFoundError: If the EvalSetResult is not found.
46+
"""
4247
raise NotImplementedError()
4348

4449
@abstractmethod
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
19+
from google.cloud import exceptions as cloud_exceptions
20+
from google.cloud import storage
21+
from typing_extensions import override
22+
23+
from ..errors.not_found_error import NotFoundError
24+
from ._eval_set_results_manager_utils import create_eval_set_result
25+
from .eval_result import EvalCaseResult
26+
from .eval_result import EvalSetResult
27+
from .eval_set_results_manager import EvalSetResultsManager
28+
29+
logger = logging.getLogger("google_adk." + __name__)
30+
31+
_EVAL_HISTORY_DIR = "evals/eval_history"
32+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
33+
34+
35+
class GcsEvalSetResultsManager(EvalSetResultsManager):
36+
"""An EvalSetResultsManager that stores eval results in a GCS bucket."""
37+
38+
def __init__(self, bucket_name: str, **kwargs):
39+
"""Initializes the GcsEvalSetsManager.
40+
41+
Args:
42+
bucket_name: The name of the bucket to use.
43+
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
44+
"""
45+
self.bucket_name = bucket_name
46+
self.storage_client = storage.Client(**kwargs)
47+
self.bucket = self.storage_client.bucket(self.bucket_name)
48+
# Check if the bucket exists.
49+
if not self.bucket.exists():
50+
raise ValueError(
51+
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
52+
" using the GcsEvalSetsManager."
53+
)
54+
55+
def _get_eval_history_dir(self, app_name: str) -> str:
56+
return f"{app_name}/{_EVAL_HISTORY_DIR}"
57+
58+
def _get_eval_set_result_blob_name(
59+
self, app_name: str, eval_set_result_id: str
60+
) -> str:
61+
eval_history_dir = self._get_eval_history_dir(app_name)
62+
return f"{eval_history_dir}/{eval_set_result_id}{_EVAL_SET_RESULT_FILE_EXTENSION}"
63+
64+
def _write_eval_set_result(
65+
self, blob_name: str, eval_set_result: EvalSetResult
66+
):
67+
"""Writes an EvalSetResult to GCS."""
68+
blob = self.bucket.blob(blob_name)
69+
blob.upload_from_string(
70+
eval_set_result.model_dump_json(indent=2),
71+
content_type="application/json",
72+
)
73+
74+
@override
75+
def save_eval_set_result(
76+
self,
77+
app_name: str,
78+
eval_set_id: str,
79+
eval_case_results: list[EvalCaseResult],
80+
) -> None:
81+
"""Creates and saves a new EvalSetResult given eval_case_results."""
82+
eval_set_result = create_eval_set_result(
83+
app_name, eval_set_id, eval_case_results
84+
)
85+
86+
eval_set_result_blob_name = self._get_eval_set_result_blob_name(
87+
app_name, eval_set_result.eval_set_result_id
88+
)
89+
logger.info("Writing eval result to blob: %s", eval_set_result_blob_name)
90+
self._write_eval_set_result(eval_set_result_blob_name, eval_set_result)
91+
92+
@override
93+
def get_eval_set_result(
94+
self, app_name: str, eval_set_result_id: str
95+
) -> EvalSetResult:
96+
"""Returns an EvalSetResult from app_name and eval_set_result_id."""
97+
eval_set_result_blob_name = self._get_eval_set_result_blob_name(
98+
app_name, eval_set_result_id
99+
)
100+
blob = self.bucket.blob(eval_set_result_blob_name)
101+
if not blob.exists():
102+
raise NotFoundError(f"Eval set result `{eval_set_result_id}` not found.")
103+
eval_set_result_data = blob.download_as_text()
104+
return EvalSetResult.model_validate_json(eval_set_result_data)
105+
106+
@override
107+
def list_eval_set_results(self, app_name: str) -> list[str]:
108+
"""Returns the eval result ids that belong to the given app_name."""
109+
eval_history_dir = self._get_eval_history_dir(app_name)
110+
eval_set_results = []
111+
try:
112+
for blob in self.bucket.list_blobs(prefix=eval_history_dir):
113+
eval_set_result_id = blob.name.split("/")[-1].removesuffix(
114+
_EVAL_SET_RESULT_FILE_EXTENSION
115+
)
116+
eval_set_results.append(eval_set_result_id)
117+
return sorted(eval_set_results)
118+
except cloud_exceptions.NotFound as e:
119+
raise ValueError(
120+
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
121+
) from e

src/google/adk/evaluation/local_eval_set_results_manager.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
import json
1818
import logging
1919
import os
20-
import time
2120

2221
from typing_extensions import override
2322

23+
from ..errors.not_found_error import NotFoundError
24+
from ._eval_set_results_manager_utils import create_eval_set_result
2425
from .eval_result import EvalCaseResult
2526
from .eval_result import EvalSetResult
2627
from .eval_set_results_manager import EvalSetResultsManager
@@ -31,10 +32,6 @@
3132
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
3233

3334

34-
def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
35-
return eval_set_result_name.replace("/", "_")
36-
37-
3835
class LocalEvalSetResultsManager(EvalSetResultsManager):
3936
"""An EvalSetResult manager that stores eval set results locally on disk."""
4037

@@ -49,15 +46,8 @@ def save_eval_set_result(
4946
eval_case_results: list[EvalCaseResult],
5047
) -> None:
5148
"""Creates and saves a new EvalSetResult given eval_case_results."""
52-
timestamp = time.time()
53-
eval_set_result_id = app_name + "_" + eval_set_id + "_" + str(timestamp)
54-
eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
55-
eval_set_result = EvalSetResult(
56-
eval_set_result_id=eval_set_result_id,
57-
eval_set_result_name=eval_set_result_name,
58-
eval_set_id=eval_set_id,
59-
eval_case_results=eval_case_results,
60-
creation_timestamp=timestamp,
49+
eval_set_result = create_eval_set_result(
50+
app_name, eval_set_id, eval_case_results
6151
)
6252
# Write eval result file, with eval_set_result_name.
6353
app_eval_history_dir = self._get_eval_history_dir(app_name)
@@ -67,7 +57,7 @@ def save_eval_set_result(
6757
eval_set_result_json = eval_set_result.model_dump_json()
6858
eval_set_result_file_path = os.path.join(
6959
app_eval_history_dir,
70-
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
60+
eval_set_result.eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
7161
)
7262
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
7363
with open(eval_set_result_file_path, "w") as f:
@@ -87,9 +77,7 @@ def get_eval_set_result(
8777
+ _EVAL_SET_RESULT_FILE_EXTENSION
8878
)
8979
if not os.path.exists(maybe_eval_result_file_path):
90-
raise ValueError(
91-
f"Eval set result `{eval_set_result_id}` does not exist."
92-
)
80+
raise NotFoundError(f"Eval set result `{eval_set_result_id}` not found.")
9381
with open(maybe_eval_result_file_path, "r") as file:
9482
eval_result_data = json.load(file)
9583
return EvalSetResult.model_validate_json(eval_result_data)

0 commit comments

Comments
 (0)
0