8000 Define EvalReport data model. · codefromthecrypt/adk-python@660c2ef · GitHub
[go: up one dir, main page]

Skip to content

Commit 660c2ef

Browse files
google-genai-botcopybara-github
authored andcommitted
Define EvalReport data model.
PiperOrigin-RevId: 757933585
1 parent 1237d53 commit 660c2ef

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

src/google/adk/cli/cli_eval.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import datetime
1516
from enum import Enum
1617
import importlib.util
1718
import json
@@ -25,8 +26,10 @@
2526
import uuid
2627

2728
from pydantic import BaseModel
29+
from pydantic import Field
2830

2931
from ..agents import Agent
32+
from ..sessions.session import Session
3033

3134
logger = logging.getLogger(__name__)
3235

@@ -43,16 +46,25 @@ class EvalMetric(BaseModel):
4346

4447

4548
class EvalMetricResult(BaseModel):
46-
score: Optional[float]
49+
score: Optional[float] = None
4750
eval_status: EvalStatus
4851

4952

50-
class EvalResult(BaseModel):
53+
class EvalCaseResult(BaseModel):
5154
eval_set_file: str
5255
eval_id: str
5356
final_eval_status: EvalStatus
5457
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
5558
session_id: str
59+
session_details: Optional[Session] = None
60+
61+
62+
class EvalSetResult(BaseModel):
63+
eval_set_result_id: str
64+
eval_set_result_name: str
65+
eval_set_id: str
66+
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
67+
creation_timestamp: float = 0.0
5668

5769

5870
MISSING_EVAL_DEPENDENCIES_MESSAGE = (
@@ -154,7 +166,7 @@ async def run_evals(
154166
session_service=None,
155167
artifact_service=None,
156168
print_detailed_results=False,
157-
) -> AsyncGenerator[EvalResult, None]:
169+
) -> AsyncGenerator[EvalCaseResult, None]:
158170
try:
159171
from ..evaluation.agent_evaluator import EvaluationGenerator
160172
from ..evaluation.response_evaluator import ResponseEvaluator
@@ -249,7 +261,7 @@ async def run_evals(
249261
else:
250262
raise ValueError("Unknown eval status.")
251263

252-
yield EvalResult(
264+
yield EvalCaseResult(
253265
eval_set_file=eval_set_file,
254266
eval_id=eval_name,
255267
final_eval_status=final_eval_status,

src/google/adk/cli/cli_tools_click.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def cli_eval(
245245

246246
try:
247247
from .cli_eval import EvalMetric
248-
from .cli_eval import EvalResult
248+
from .cli_eval import EvalCaseResult
249249
from .cli_eval import EvalStatus
250250
from .cli_eval import get_evaluation_criteria_or_default
251251
from .cli_eval import get_root_agent
@@ -269,7 +269,7 @@ def cli_eval(
269269

270270
eval_set_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
271271

272-
async def _collect_eval_results() -> list[EvalResult]:
272+
async def _collect_eval_results() -> list[EvalCaseResult]:
273273
return [
274274
result
275275
async for result in run_evals(
@@ -290,7 +290,7 @@ async def _collect_eval_results() -> list[EvalResult]:
290290
eval_run_summary = {}
291291

292292
for eval_result in eval_results:
293-
eval_result: EvalResult
293+
eval_result: EvalCaseResult
294294

295295
if eval_result.eval_set_file not in eval_run_summary:
296296
eval_run_summary[eval_result.eval_set_file] = [0, 0]

tests/unittests/cli/utils/test_cli_tools_click.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ class _EvalMetric:
250250
def __init__(self, metric_name: str, threshold: float) -> None:
251251
...
252252

253-
class _EvalResult:
253+
class _EvalCaseResult:
254254

255255
def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
256256
self.eval_set_file = eval_set_file
@@ -261,7 +261,7 @@ def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
261261

262262
# helper funcs
263263
stub.EvalMetric = _EvalMetric
264-
stub.EvalResult = _EvalResult
264+
stub.EvalCaseResult = _EvalCaseResult
265265
stub.EvalStatus = _EvalStatus
266266
stub.MISSING_EVAL_DEPENDENCIES_MESSAGE = "stub msg"
267267

@@ -272,8 +272,8 @@ def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
272272

273273
# Create an async generator function for run_evals
274274
async def mock_run_evals(*_a, **_k):
275-
yield _EvalResult("set1.json", "PASSED")
276-
yield _EvalResult("set1.json", "FAILED")
275+
yield _EvalCaseResult("set1.json", "PASSED")
276+
yield _EvalCaseResult("set1.json", "FAILED")
277277

278278
stub.run_evals = mock_run_evals
279279

0 commit comments

Comments
 (0)
0