8000 Refactor eval results reporting with Eval Set Results manager. · rondweb/adk-python@9928caf · GitHub
[go: up one dir, main page]

Skip to content

Commit 9928caf

Browse files
google-genai-botcopybara-github
authored andcommitted
Refactor eval results reporting with Eval Set Results manager.
PiperOrigin-RevId: 761601525
1 parent c5a0437 commit 9928caf

File tree

4 files changed

+161
-56
lines changed

4 files changed

+161
-56
lines changed

src/google/adk/cli/cli_eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class EvalCaseResult(common.BaseModel):
8282
"""The eval case id."""
8383

8484
final_eval_status: EvalStatus
85-
"""Final evalu status for this eval case."""
85+
"""Final eval status for this eval case."""
8686

8787
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
8888
deprecated=True,

src/google/adk/cli/fast_api.py

Lines changed: 12 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
6565
from ..evaluation.eval_case import EvalCase
6666
from ..evaluation.eval_case import SessionInput
67+
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
6768
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
6869
from ..events.event import Event
6970
from ..memory.in_memory_memory_service import InMemoryMemoryService
@@ -322,6 +323,7 @@ async def close_toolset_safely(toolset):
322323
memory_service = InMemoryMemoryService()
323324

324325
eval_sets_manager = LocalEvalSetsManager(agent_dir=agent_dir)
326+
eval_set_results_manager = LocalEvalSetResultsManager(agent_dir=agent_dir)
325327

326328
# Build the Session service
327329
agent_engine_id = ""
@@ -594,32 +596,10 @@ async def run_eval(
594596
)
595597
eval_case_results.append(eval_case_result)
596598

597-
timestamp = time.time()
598-
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
599-
eval_set_result = EvalSetResult(
600-
eval_set_result_id=eval_set_result_name,
601-
eval_set_result_name=eval_set_result_name,
602-
eval_set_id=eval_set_id,
603-
eval_case_results=eval_case_results,
604-
creation_timestamp=timestamp,
599+
eval_set_results_manager.save_eval_set_result(
600+
app_name, eval_set_id, eval_case_results
605601
)
606602

607-
# Write eval result file, with eval_set_result_name.
608-
app_eval_history_dir = os.path.join(
609-
agent_dir, app_name, ".adk", "eval_history"
610-
)
611-
if not os.path.exists(app_eval_history_dir):
612-
os.makedirs(app_eval_history_dir)
613-
# Convert to json and write to file.
614-
eval_set_result_json = eval_set_result.model_dump_json()
615-
eval_set_result_file_path = os.path.join(
616-
app_eval_history_dir,
617-
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
618-
)
619-
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
620-
with open(eval_set_result_file_path, "w") as f:
621-
f.write(json.dumps(eval_set_result_json, indent=2))
622-
623603
return run_eval_results
624604

625605
@app.get(
@@ -631,45 +611,22 @@ def get_eval_result(
631611
eval_result_id: str,
632612
) -> EvalSetResult:
633613
"""Gets the eval result for the given eval id."""
634-
# Load the eval set file data
635-
maybe_eval_result_file_path = (
636-
os.path.join(
637-
agent_dir, app_name, ".adk", "eval_history", eval_result_id
638-
)
639-
+ _EVAL_SET_RESULT_FILE_EXTENSION
640-
)
641-
if not os.path.exists(maybe_eval_result_file_path):
642-
raise HTTPException(
643-
status_code=404,
644-
detail=f"Eval result `{eval_result_id}` not found.",
645-
)
646-
with open(maybe_eval_result_file_path, "r") as file:
647-
eval_result_data = json.load(file) # Load JSON into a list
648614
try:
649-
eval_result = EvalSetResult.model_validate_json(eval_result_data)
650-
return eval_result
651-
except ValidationError as e:
652-
logger.exception("get_eval_result validation error: %s", e)
615+
return eval_set_results_manager.get_eval_set_result(
616+
app_name, eval_result_id
617+
)
618+
except ValueError as ve:
619+
raise HTTPException(status_code=404, detail=str(ve)) from ve
620+
except ValidationError as ve:
621+
raise HTTPException(status_code=500, detail=str(ve)) from ve
653622

654623
@app.get(
655624
"/apps/{app_name}/eval_results",
656625
response_model_exclude_none=True,
657626
)
658627
def list_eval_results(app_name: str) -> list[str]:
659628
"""Lists all eval results for the given app."""
660-
app_eval_history_directory = os.path.join(
661-
agent_dir, app_name, ".adk", "eval_history"
662-
)
663-
664-
if not os.path.exists(app_eval_history_directory):
665-
return []
666-
667-
eval_result_files = [
668-
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
669-
for file in os.listdir(app_eval_history_directory)
670-
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
671-
]
672-
return eval_result_files
629+
return eval_set_results_manager.list_eval_set_results(app_name)
673630

674631
@app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
675632
async def delete_session(app_name: str, user_id: str, session_id: str):
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from abc import ABC, abstractmethod
16+
17+
from ..cli.cli_eval import EvalCaseResult
18+
from ..cli.cli_eval import EvalSetResult
19+
20+
21+
class EvalSetResultsManager(ABC):
22+
"""An interface to manage Eval Set Results."""
23+
24+
@abstractmethod
25+
def save_eval_set_result(
26+
self,
27+
app_name: str,
28+
eval_set_id: str,
29+
eval_case_results: list[EvalCaseResult],
30+
) -> None:
31+
"""Creates and saves a new EvalSetResult given eval_case_results."""
32+
raise NotImplementedError()
33+
34+
@abstractmethod
35+
def get_eval_set_result(
36+
self, app_name: str, eval_set_result_id: str
37+
) -> EvalSetResult:
38+
"""Returns an EvalSetResult identified by app_name and eval_set_result_id."""
39+
raise NotImplementedError()
40+
41+
@abstractmethod
42+
def list_eval_set_results(self, app_name: str) -> list[str]:
43+
"""Returns the eval result ids that belong to the given app_name."""
44+
raise NotImplementedError()
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import json
16+
import logging
17+
import os
18+
import time
19+
from typing_extensions import override
20+
from ..cli.cli_eval import EvalCaseResult
21+
from ..cli.cli_eval import EvalSetResult
22+
from .eval_set_results_manager import EvalSetResultsManager
23+
24+
logger = logging.getLogger("google_adk." + __name__)
25+
26+
_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
27+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
28+
29+
30+
class LocalEvalSetResultsManager(EvalSetResultsManager):
31+
"""An EvalSetResult manager that stores eval set results locally on disk."""
32+
33+
def __init__(self, agent_dir: str):
34+
self._agent_dir = agent_dir
35+
36+
@override
37+
def save_eval_set_result(
38+
self,
39+
app_name: str,
40+
eval_set_id: str,
41+
eval_case_results: list[EvalCaseResult],
42+
) -> None:
43+
"""Creates and saves a new EvalSetResult given eval_case_results."""
44+
timestamp = time.time()
45+
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
46+
eval_set_result = EvalSetResult(
47+
eval_set_result_id=eval_set_result_name,
48+
eval_set_result_name=eval_set_result_name,
49+
eval_set_id=eval_set_id,
50+
eval_case_results=eval_case_results,
51+
creation_timestamp=timestamp,
52+
)
53+
# Write eval result file, with eval_set_result_name.
54+
app_eval_history_dir = self._get_eval_history_dir(app_name)
55+
if not os.path.exists(app_eval_history_dir):
56+
os.makedirs(app_eval_history_dir)
57+
# Convert to json and write to file.
58+
eval_set_result_json = eval_set_result.model_dump_json()
59+
eval_set_result_file_path = os.path.join(
60+
app_eval_history_dir,
61+
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
62+
)
63+
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
64+
with open(eval_set_result_file_path, "w") as f:
65+
f.write(json.dumps(eval_set_result_json, indent=2))
66+
67+
@override
68+
def get_eval_set_result(
69+
self, app_name: str, eval_set_result_id: str
70+
) -> EvalSetResult:
71+
"""Returns an EvalSetResult identified by app_name and eval_set_result_id."""
72+
# Load the eval set result file data.
73+
maybe_eval_result_file_path = (
74+
os.path.join(
75+
self._get_eval_history_dir(app_name),
76+
eval_set_result_id,
77+
)
78+
+ _EVAL_SET_RESULT_FILE_EXTENSION
79+
)
80+
if not os.path.exists(maybe_eval_result_file_path):
81+
raise ValueError(
82+
f"Eval set result `{eval_set_result_id}` does not exist."
83+
)
84+
with open(maybe_eval_result_file_path, "r") as file:
85+
eval_result_data = json.load(file)
86+
return EvalSetResult.model_validate_json(eval_result_data)
87+
88+
@override
89+
def list_eval_set_results(self, app_name: str) -> list[str]:
90+
"""Returns the eval result ids that belong to the given app_name."""
91+
app_eval_history_directory = self._get_eval_history_dir(app_name)
92+
93+
if not os.path.exists(app_eval_history_directory):
94+
return []
95+
96+
eval_result_files = [
97+
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
98+
for file in os.listdir(app_eval_history_directory)
99+
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
100+
]
101+
return eval_result_files
102+
103+
def _get_eval_history_dir(self, app_name: str) -> str:
104+
return os.path.join(self._agent_dir, app_name, _ADK_EVAL_HISTORY_DIR)

0 commit comments

Comments
 (0)
0