28
28
import click
29
29
from click .testing import CliRunner
30
30
from google .adk .cli import cli_tools_click
31
+ from google .adk .evaluation import local_eval_set_results_manager
31
32
import pytest
32
33
33
34
@@ -255,9 +256,17 @@ def __init__(self, metric_name: str, threshold: float) -> None:
255
256
256
257
class _EvalCaseResult :
257
258
258
- def __init__ (self , eval_set_id : str , final_eval_status : str ) -> None :
259
+ def __init__ (
260
+ self ,
261
+ eval_set_id : str ,
262
+ final_eval_status : str ,
263
+ user_id : str ,
264
+ session_id : str ,
265
+ ) -> None :
259
266
self .eval_set_id = eval_set_id
260
267
self .final_eval_status = final_eval_status
268
+ self .user_id = user_id
269
+ self .session_id = session_id
261
270
262
271
class EvalCase :
263
272
@@ -266,9 +275,19 @@ def __init__(self, eval_id: str):
266
275
267
276
class EvalSet :
268
277
269
- def __init__ (self , eval_cases : list [EvalCase ]):
278
+ def __init__ (self , eval_set_id : str , eval_cases : list [EvalCase ]):
279
+ self .eval_set_id = eval_set_id
270
280
self .eval_cases = eval_cases
271
281
282
+ def mock_save_eval_set_result (cls , * args , ** kwargs ):
283
+ return None
284
+
285
+ monkeypatch .setattr (
286
+ local_eval_set_results_manager .LocalEvalSetResultsManager ,
287
+ "save_eval_set_result" ,
288
+ mock_save_eval_set_result ,
289
+ )
290
+
272
291
# minimal enum-like namespace
273
292
_EvalStatus = types .SimpleNamespace (PASSED = "PASSED" , FAILED = "FAILED" )
274
293
@@ -283,13 +302,14 @@ def __init__(self, eval_cases: list[EvalCase]):
283
302
stub .try_get_reset_func = lambda _p : None
284
303
stub .parse_and_get_evals_to_run = lambda _paths : {"set1.json" : ["e1" , "e2" ]}
285
304
eval_sets_manager_stub .load_eval_set_from_file = lambda x , y : EvalSet (
305
+ "test_eval_set_id" ,
286
306
[EvalCase ("e1" ), EvalCase ("e2" )]
287
307
)
288
308
289
309
# Create an async generator function for run_evals
290
310
async def mock_run_evals (* _a , ** _k ):
291
- yield _EvalCaseResult ("set1.json" , "PASSED" )
292
- yield _EvalCaseResult ("set1.json" , "FAILED" )
311
+ yield _EvalCaseResult ("set1.json" , "PASSED" , "user" , "session1" )
312
+ yield _EvalCaseResult ("set1.json" , "FAILED" , "user" , "session2" )
293
313
294
314
stub .run_evals = mock_run_evals
295
315
0 commit comments