10000 Add 'get_eval_report' and 'list_eval_reports' endpoints. · codefromthecrypt/adk-python@05a0c6b · GitHub
[go: up one dir, main page]

Skip to content

Commit 05a0c6b

Browse files
google-genai-botcopybara-github
authored andcommitted
Add 'get_eval_report' and 'list_eval_reports' endpoints.
PiperOrigin-RevId: 757936497
1 parent df0892a commit 05a0c6b

File tree

2 files changed

+118
-17
lines changed

2 files changed

+118
-17
lines changed

src/google/adk/cli/cli_eval.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class EvalCaseResult(BaseModel):
5757
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
5858
session_id: str
5959
session_details: Optional[Session] = None
60+
user_id: Optional[str] = None
6061

6162

6263
class EvalSetResult(BaseModel):
@@ -185,6 +186,7 @@ async def run_evals(
185186
eval_name = eval_item["name"]
186187
eval_data = eval_item["data"]
187188
initial_session = eval_item.get("initial_session", {})
189+
user_id = initial_session.get("user_id", "test_user_id")
188190

189191
if evals_to_run and eval_name not in evals_to_run:
190192
continue
@@ -267,6 +269,7 @@ async def run_evals(
267269
final_eval_status=final_eval_status,
268270
eval_metric_results=eval_metric_results,
269271
session_id=session_id,
272+
user_id=user_id,
270273
)
271274

272275
if final_eval_status == EvalStatus.PASSED:

src/google/adk/cli/fast_api.py

Lines changed: 115 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
from pathlib import Path
2323
import re
2424
import sys
25+
import time
2526
import traceback
2627
import typing
2728
from typing import Any
2829
from typing import List
2930
from typing import Literal
3031
from typing import Optional
31-
from typing import Union
3232

3333
import click
3434
from fastapi import FastAPI
@@ -71,8 +71,10 @@
7171
from ..sessions.vertex_ai_session_service import VertexAiSessionService
7272
from ..tools.base_toolset import BaseToolset
7373
from .cli_eval import EVAL_SESSION_ID_PREFIX
74+
from .cli_eval import EvalCaseResult
7475
from .cli_eval import EvalMetric
7576
from .cli_eval import EvalMetricResult
77+
from .cli_eval import EvalSetResult
7678
from .cli_eval import EvalStatus
7779
from .utils import create_empty_state
7880
from .utils import envs
@@ -81,6 +83,7 @@
8183
logger = logging.getLogger(__name__)
8284

8385
_EVAL_SET_FILE_EXTENSION = ".evalset.json"
86+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
8487

8588

8689
class ApiServerSpanExporter(export.SpanExporter):
@@ -137,10 +140,12 @@ class RunEvalResult(BaseModel):
137140
populate_by_name=True,
138141
)
139142

143+
eval_set_file: str
140144
eval_set_id: str
141145
eval_id: str
142146
final_eval_status: EvalStatus
143147
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
148+
user_id: str
144149
session_id: str
145150

146151

@@ -484,24 +489,117 @@ async def run_eval(
484489
"Eval ids to run list is empty. We will all evals in the eval set."
485490
)
486491
root_agent = await _get_root_agent_async(app_name)
487-
return [
488-
RunEvalResult(
489-
app_name=app_name,
490-
eval_set_id=eval_set_id,
491-
eval_id=eval_result.eval_id,
492-
final_eval_status=eval_result.final_eval_status,
493-
eval_metric_results=eval_result.eval_metric_results,
494-
session_id=eval_result.session_id,
495-
)
496-
async for eval_result in run_evals(
497-
eval_set_to_evals,
498-
root_agent,
499-
getattr(root_agent, "reset_data", None),
500-
req.eval_metrics,
501-
session_service=session_service,
502-
artifact_service=artifact_service,
492+
run_eval_results = []
493+
eval_case_results = []
494+
async for eval_result in run_evals(
495+
eval_set_to_evals,
496+
root_agent,
497+
getattr(root_agent, "reset_data", None),
498+
req.eval_metrics,
499+
session_service=session_service,
500+
artifact_service=artifact_service,
501+
):
502+
run_eval_results.append(
503+
RunEvalResult(
504+
app_name=app_name,
505+
eval_set_file=eval_result.eval_set_file,
506+
eval_set_id=eval_set_id,
507+
eval_id=eval_result.eval_id,
508+
final_eval_status=eval_result.final_eval_status,
509+
eval_metric_results=eval_result.eval_metric_results,
510+
user_id=eval_result.user_id,
511+
session_id=eval_result.session_id,
512+
)
513+
)
514+
session = session_service.get_session(
515+
app_name=app_name,
516+
user_id=eval_result.user_id,
517+
session_id=eval_result.session_id,
518+
)
519+
eval_case_results.append(
520+
EvalCaseResult(
521+
eval_set_file=eval_result.eval_set_file,
522+
eval_id=eval_result.eval_id,
523+
final_eval_status=eval_result.final_eval_status,
524+
eval_metric_results=eval_result.eval_metric_results,
525+
session_id=eval_result.session_id,
526+
session_details=session,
527+
user_id=eval_result.user_id,
528+
)
529+
)
530+
531+
timestamp = time.time()
532+
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
533+
eval_set_result = EvalSetResult(
534+
eval_set_result_id=eval_set_result_name,
535+
eval_set_result_name=eval_set_result_name,
536+
eval_set_id=eval_set_id,
537+
eval_case_results=eval_case_results,
538+
creation_timestamp=timestamp,
539+
)
540+
541+
# Write eval result file, with eval_set_result_name.
542+
app_eval_history_dir = os.path.join(
543+
agent_dir, app_name, ".adk", "eval_history"
544+
)
545+
if not os.path.exists(app_eval_history_dir):
546+
os.makedirs(app_eval_history_dir)
547+
# Convert to json and write to file.
548+
eval_set_result_json = eval_set_result.model_dump_json()
549+
eval_set_result_file_path = os.path.join(
550+
app_eval_history_dir,
551+
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
552+
)
553+
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
554+
with open(eval_set_result_file_path, "w") as f:
555+
f.write(json.dumps(eval_set_result_json, indent=2))
556+
557+
return run_eval_results
558+
559+
@app.get(
560+
"/apps/{app_name}/eval_results/{eval_result_id}",
561+
response_model_exclude_none=True,
562+
)
563+
def get_eval_result(
564+
app_name: str,
565+
eval_result_id: str,
566+
) -> EvalSetResult:
567+
"""Gets the eval result for the given eval id."""
568+
# Load the eval set file data
569+
maybe_eval_result_file_path = (
570+
os.path.join(
571+
agent_dir, app_name, ".adk", "eval_history", eval_result_id
503572
)
573+
+ _EVAL_SET_RESULT_FILE_EXTENSION
574+
)
575+
if not os.path.exists(maybe_eval_result_file_path):
576+
raise HTTPException(
577+
status_code=404,
578+
detail=f"Eval result `{eval_result_id}` not found.",
579+
)
580+
with open(maybe_eval_result_file_path, "r") as file:
581+
eval_result_data = json.load(file) # Load JSON into a list
582+
try:
583+
eval_result = EvalSetResult.model_validate_json(eval_result_data)
584+
return eval_result
585+
except ValidationError as e:
586+
logger.exception("get_eval_result validation error: %s", e)
587+
588+
@app.get(
589+
"/apps/{app_name}/eval_results",
590+
response_model_exclude_none=True,
591+
)
592+
def list_eval_results(app_name: str) -> list[str]:
593+
"""Lists all eval results for the given app."""
594+
app_eval_history_directory = os.path.join(
595+
agent_dir, app_name, ".adk", "eval_history"
596+
)
597+
eval_result_files = [
598+
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
599+
for file in os.listdir(app_eval_history_directory)
600+
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
504601
]
602+
return eval_result_files
505603

506604
@app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
507605
def delete_session(app_name: str, user_id: str, session_id: str):

0 commit comments

Comments
 (0)
0