21
21
from fastapi .testclient import TestClient
22
22
from google .adk .agents .base_agent import BaseAgent
23
23
from google .adk .agents .run_config import RunConfig
24
+ from google .adk .evaluation .eval_case import EvalCase
25
+ from google .adk .evaluation .eval_case import Invocation
26
+ from google .adk .evaluation .eval_set import EvalSet
27
+ from google .adk .evaluation .eval_result import EvalSetResult
24
28
from google .adk .cli .fast_api import get_fast_api_app
25
29
from google .adk .events import Event
26
30
from google .adk .runners import Runner
@@ -281,12 +285,107 @@ def mock_memory_service():
281
285
return MagicMock ()
282
286
283
287
288
+ @pytest .fixture
289
+ def mock_eval_sets_manager ():
290
+ """Create a mock eval sets manager."""
291
+
292
+ # Storage for eval sets.
293
+ eval_sets = {}
294
+
295
+ class MockEvalSetsManager :
296
+ """Mock eval sets manager."""
297
+
298
+ def create_eval_set (self , app_name , eval_set_id ):
299
+ """Create an eval set."""
300
+ if app_name not in eval_sets :
301
+ eval_sets [app_name ] = {}
302
+
303
+ if eval_set_id in eval_sets [app_name ]:
304
+ raise ValueError (f"Eval set { eval_set_id } already exists." )
305
+
306
+ eval_sets [app_name ][eval_set_id ] = EvalSet (
307
+ eval_set_id = eval_set_id , eval_cases = []
308
+ )
309
+ return eval_set_id
310
+
311
+ def get_eval_set (self , app_name , eval_set_id ):
312
+ """Get an eval set."""
313
+ if app_name not in eval_sets :
314
+ raise ValueError (f"App { app_name } not found." )
315
+ if eval_set_id not in eval_sets [app_name ]:
316
+ raise ValueError (f"Eval set { eval_set_id } not found in app { app_name } ." )
317
+ return eval_sets [app_name ][eval_set_id ]
318
+
319
+ def list_eval_sets (self , app_name ):
320
+ """List eval sets."""
321
+ if app_name not in eval_sets :
322
+ raise ValueError (f"App { app_name } not found." )
323
+ return list (eval_sets [app_name ].keys ())
324
+
325
+ def add_eval_case (self , app_name , eval_set_id , eval_case ):
326
+ """Add an eval case to an eval set."""
327
+ if app_name not in eval_sets :
328
+ raise ValueError (f"App { app_name } not found." )
329
+ if eval_set_id not in eval_sets [app_name ]:
330
+ raise ValueError (f"Eval set { eval_set_id } not found in app { app_name } ." )
331
+ eval_sets [app_name ][eval_set_id ].eval_cases .append (eval_case )
332
+
333
+ return MockEvalSetsManager ()
334
+
335
+
336
+ @pytest .fixture
337
+ def mock_eval_set_results_manager ():
338
+ """Create a mock eval set results manager."""
339
+
340
+ # Storage for eval set results.
341
+ eval_set_results = {}
342
+
343
+ class MockEvalSetResultsManager :
344
+ """Mock eval set results manager."""
345
+
346
+ def save_eval_set_result (
347
+ self , app_name , eval_set_id , eval_case_results
348
+ ):
349
+ if app_name not in eval_set_results :
350
+ eval_set_results [app_name ] = {}
351
+ eval_set_result_id = f"{ app_name } _{ eval_set_id } _eval_result"
352
+ eval_set_result = EvalSetResult (
353
+ eval_set_result_id = eval_set_result_id ,
354
+ eval_set_result_name = eval_set_result_id ,
355
+ eval_set_id = eval_set_id ,
356
+ eval_case_results = eval_case_results ,
357
+ )
358
+ if eval_set_result_id not in eval_set_results [app_name ]:
359
+ eval_set_results [app_name ][eval_set_result_id ] = eval_set_result
360
+ else :
361
+ eval_set_results [app_name ][eval_set_result_id ].append (eval_set_result )
362
+
363
+ def get_eval_set_result (self , app_name , eval_set_result_id ):
364
+ if app_name not in eval_set_results :
365
+ raise ValueError (f"App { app_name } not found." )
366
+ if eval_set_result_id not in eval_set_results [app_name ]:
367
+ raise ValueError (
368
+ f"Eval set result { eval_set_result_id } not found in app { app_name } ."
369
+ )
370
+ return eval_set_results [app_name ][eval_set_result_id ]
371
+
372
+ def list_eval_set_results (self , app_name ):
373
+ """List eval set results."""
374
+ if app_name not in eval_set_results :
375
+ raise ValueError (f"App { app_name } not found." )
376
+ return list (eval_set_results [app_name ].keys ())
377
+
378
+ return MockEvalSetResultsManager ()
379
+
380
+
284
381
@pytest .fixture
285
382
def test_app (
286
383
mock_session_service ,
287
384
mock_artifact_service ,
288
385
mock_memory_service ,
289
386
mock_agent_loader ,
387
+ mock_eval_sets_manager ,
388
+ mock_eval_set_results_manager ,
290
389
):
291
390
"""Create a TestClient for the FastAPI app without starting a server."""
292
391
@@ -309,6 +408,14 @@ def test_app(
309
408
"google.adk.cli.fast_api.AgentLoader" ,
310
409
return_value = mock_agent_loader ,
311
410
),
411
+ patch (
412
+ "google.adk.cli.fast_api.LocalEvalSetsManager" ,
413
+ return_value = mock_eval_sets_manager ,
414
+ ),
415
+ patch (
416
+ "google.adk.cli.fast_api.LocalEvalSetResultsManager" ,
417
+ return_value = mock_eval_set_results_manager ,
418
+ ),
312
419
):
313
420
# Get the FastAPI app, but don't actually run it
314
421
app = get_fast_api_app (
@@ -339,6 +446,35 @@ async def create_test_session(
339
446
return test_session_info
340
447
341
448
449
+ @pytest .fixture
450
+ async def create_test_eval_set (
451
+ test_app , test_session_info , mock_eval_sets_manager
452
+ ):
453
+ """Create a test eval set using the mocked eval sets manager."""
454
+ _ = mock_eval_sets_manager .create_eval_set (
455
+ app_name = test_session_info ["app_name" ],
456
+ eval_set_id = "test_eval_set_id" ,
457
+ )
458
+ test_eval_case = EvalCase (
459
+ eval_id = "test_eval_case_id" ,
460
+ conversation = [
461
+ Invocation (
462
+ invocation_id = "test_invocation_id" ,
463
+ user_content = types .Content (
464
+ parts = [types .Part (text = "test_user_content" )],
465
+ role = "user" ,
466
+ ),
467
+ )
468
+ ],
469
+ )
470
+ _ = mock_eval_sets_manager .add_eval_case (
471
+ app_name = test_session_info ["app_name" ],
472
+ eval_set_id = "test_eval_set_id" ,
473
+ eval_case = test_eval_case ,
474
+ )
475
+ return test_session_info
476
+
477
+
342
478
#################################################
343
479
# Test Cases
344
480
#################################################
@@ -479,6 +615,98 @@ def test_list_artifact_names(test_app, create_test_session):
479
615
logger .info (f"Listed { len (data )} artifacts" )
480
616
481
617
618
+ def test_get_eval_set_not_found (test_app ):
619
+ """Test getting an eval set that doesn't exist."""
620
+ url = "/apps/test_app_name/eval_sets/test_eval_set_id_not_found"
621
+ response = test_app .get (url )
622
+ assert response .status_code == 404
623
+
624
+
625
+ def test_create_eval_set (test_app , test_session_info ):
626
+ """Test creating an eval set."""
627
+ url = f"/apps/{ test_session_info ['app_name' ]} /eval_sets/test_eval_set_id"
628
+ response = test_app .post (url )
629
+
630
+ # Verify the response
631
+ assert response .status_code == 200
632
+
633
+
634
+ def test_list_eval_sets (test_app , create_test_eval_set ):
635
+ """Test get eval set."""
636
+ info = create_test_eval_set
637
+ url = f"/apps/{ info ['app_name' ]} /eval_sets"
638
+ response = test_app .get (url )
639
+
640
+ # Verify the response
641
+ assert response .status_code == 200
642
+ data = response .json ()
643
+ assert isinstance (data , list )
644
+ assert len (data ) == 1
645
+ assert data [0 ] == "test_eval_set_id"
646
+
647
+
648
+ def test_get_eval_set_result_not_found (test_app ):
649
+ """Test getting an eval set result that doesn't exist."""
650
+ url = "/apps/test_app_name/eval_results/test_eval_result_id_not_found"
651
+ response = test_app .get (url )
652
+ assert response .status_code == 404
653
+
654
+
655
+ def test_run_eval (test_app , create_test_eval_set ):
656
+ """Test running an eval."""
657
+
658
+ # Helper function to verify eval case result.
659
+ def verify_eval_case_result (actual_eval_case_result ):
660
+ expected_eval_case_result = {
661
+ "evalSetId" : "test_eval_set_id" ,
662
+ "evalId" : "test_eval_case_id" ,
663
+ "finalEvalStatus" : 1 ,
664
+ "overallEvalMetricResults" : [{
665
+ "metricName" : "tool_trajectory_avg_score" ,
666
+ "threshold" : 0.5 ,
667
+ "score" : 1.0 ,
668
+ "evalStatus" : 1 ,
669
+ }],
670
+ }
671
+ for k , v in expected_eval_case_result .items ():
672
+ assert actual_eval_case_result [k ] == v
673
+
674
+ info = create_test_eval_set
675
+ url = f"/apps/{ info ['app_name' ]} /eval_sets/test_eval_set_id/run_eval"
676
+ payload = {
677
+ "eval_ids" : ["test_eval_case_id" ],
678
+ "eval_metrics" : [
679
+ {"metric_name" : "tool_trajectory_avg_score" , "threshold" : 0.5 }
680
+ ],
681
+ }
682
+ response = test_app .post (url , json = payload )
683
+
684
+ # Verify the response
685
+ assert response .status_code == 200
686
+
687
+ data = response .json ()
688
+ assert len (data ) == 1
689
+ verify_eval_case_result (data [0 ])
690
+
691
+ # Verify the eval set result is saved via get_eval_result endpoint.
692
+ url = f"/apps/{ info ['app_name' ]} /eval_results/{ info ['app_name' ]} _test_eval_set_id_eval_result"
693
+ response = test_app .get (url )
694
+ assert response .status_code == 200
695
+ data = response .json ()
696
+ assert isinstance (data , dict )
697
+ assert data ["evalSetId" ] == "test_eval_set_id"
698
+ assert data ["evalSetResultId" ] == f"{ info ['app_name' ]} _test_eval_set_id_eval_result"
699
+ assert len (data ["evalCaseResults" ]) == 1
700
+ verify_eval_case_result (data ["evalCaseResults" ][0 ])
701
+
702
+ # Verify the eval set result is saved via list_eval_results endpoint.
703
+ url = f"/apps/{ info ['app_name' ]} /eval_results"
704
+ response = test_app .get (url )
705
+ assert response .status_code == 200
706
+ data = response .json ()
707
+ assert data == [f"{ info ['app_name' ]} _test_eval_set_id_eval_result" ]
708
+
709
+
482
710
def test_debug_trace (test_app ):
483
711
"""Test the debug trace endpoint."""
484
712
# This test will likely return 404 since we haven't set up trace data,
0 commit comments