63
63
from ..agents .llm_agent import LlmAgent
64
64
from ..agents .run_config import StreamingMode
65
65
from ..artifacts import InMemoryArtifactService
66
+ from ..evaluation .local_eval_sets_manager import LocalEvalSetsManager
66
67
from ..events .event import Event
67
68
from ..memory .in_memory_memory_service import InMemoryMemoryService
68
69
from ..runners import Runner
@@ -252,6 +253,8 @@ async def internal_lifespan(app: FastAPI):
252
253
artifact_service = InMemoryArtifactService ()
253
254
memory_service = InMemoryMemoryService ()
254
255
256
+ eval_sets_manager = LocalEvalSetsManager (agent_dir = agent_dir )
257
+
255
258
# Build the Session service
256
259
agent_engine_id = ""
257
260
if session_db_url :
@@ -401,44 +404,21 @@ def create_eval_set(
401
404
eval_set_id : str ,
402
405
):
403
406
"""Creates an eval set, given the id."""
404
- pattern = r"^[a-zA-Z0-9_]+$"
405
- if not bool (re .fullmatch (pattern , eval_set_id )):
407
+ try :
408
+ eval_sets_manager .create_eval_set (app_name , eval_set_id )
409
+ except ValueError as ve :
406
410
raise HTTPException (
407
411
status_code = 400 ,
408
- detail = (
409
- f"Invalid eval set id. Eval set id should have the `{ pattern } `"
410
- " format"
411
- ),
412
- )
413
- # Define the file path
414
- new_eval_set_path = _get_eval_set_file_path (
415
- app_name , agent_dir , eval_set_id
416
- )
417
-
418
- logger .info ("Creating eval set file `%s`" , new_eval_set_path )
419
-
420
- if not os .path .exists (new_eval_set_path ):
421
- # Write the JSON string to the file
422
- logger .info ("Eval set file doesn't exist, we will create a new one." )
423
- with open (new_eval_set_path , "w" ) as f :
424
- empty_content = json .dumps ([], indent = 2 )
D7AE
425
- f .write (empty_content )
412
+ detail = str (ve ),
413
+ ) from ve
426
414
427
415
@app .get (
428
416
"/apps/{app_name}/eval_sets" ,
429
417
response_model_exclude_none = True ,
430
418
)
431
419
def list_eval_sets (app_name : str ) -> list [str ]:
432
420
"""Lists all eval sets for the given app."""
433
- eval_set_file_path = os .path .join (agent_dir , app_name )
434
- eval_sets = []
435
- for file in os .listdir (eval_set_file_path ):
436
- if file .endswith (_EVAL_SET_FILE_EXTENSION ):
437
- eval_sets .append (
438
- os .path .basename (file ).removesuffix (_EVAL_SET_FILE_EXTENSION )
439
- )
440
-
441
- return sorted (eval_sets )
421
+ return eval_sets_manager .list_eval_sets (app_name )
442
422
443
423
@app .post (
444
424
"/apps/{app_name}/eval_sets/{eval_set_id}/add_session" ,
@@ -447,33 +427,11 @@ def list_eval_sets(app_name: str) -> list[str]:
447
427
async def add_session_to_eval_set (
448
428
app_name : str , eval_set_id : str , req : AddSessionToEvalSetRequest
449
429
):
450
- pattern = r"^[a-zA-Z0-9_]+$"
451
- if not bool (re .fullmatch (pattern , req .eval_id )):
452
- raise HTTPException (
453
- status_code = 400 ,
454
- detail = f"Invalid eval id. Eval id should have the `{ pattern } ` format" ,
455
- )
456
-
457
430
# Get the session
458
431
session = session_service .get_session (
459
432
app_name = app_name , user_id = req .user_id , session_id = req .session_id
460
433
)
461
434
assert session , "Session not found."
462
- # Load the eval set file data
463
- eval_set_file_path = _get_eval_set_file_path (
464
- app_name , agent_dir , eval_set_id
465
- )
466
- with open (eval_set_file_path , "r" ) as file :
467
- eval_set_data = json .load (file ) # Load JSON into a list
468
-
469
- if [x for x in eval_set_data if x ["name" ] == req .eval_id ]:
470
- raise HTTPException (
471
- status_code = 400 ,
472
- detail = (
473
- f"Eval id `{ req .eval_id } ` already exists in `{ eval_set_id } `"
474
- " eval set."
475
- ),
476
- )
477
435
478
436
# Convert the session data to evaluation format
479
437
test_data = evals .convert_session_to_eval_format (session )
@@ -483,18 +441,19 @@ async def add_session_to_eval_set(
483
441
await _get_root_agent_async (app_name )
484
442
)
485
443
486
- eval_set_data . append ( {
444
+ eval_case = {
487
445
"name" : req .eval_id ,
488
446
"data" : test_data ,
489
447
"initial_session" : {
490
448
"state" : initial_session_state ,
491
449
"app_name" : app_name ,
492
450
"user_id" : req .user_id ,
493
451
},
494
- })
495
- # Serialize the test data to JSON and write to the eval set file.
496
- with open (eval_set_file_path , "w" ) as f :
497
- f .write (json .dumps (eval_set_data , indent = 2 ))
452
+ }
453
+ try :
454
+ eval_sets_manager .add_eval_case (app_name , eval_set_id , eval_case )
455
+ except ValueError as ve :
456
+ raise HTTPException (status_code = 400 , detail = str (ve )) from ve
498
457
499
458
@app .get (
500
459
"/apps/{app_name}/eval_sets/{eval_set_id}/evals" ,
@@ -505,12 +464,7 @@ def list_evals_in_eval_set(
505
464
eval_set_id : str ,
506
465
) -> list [str ]:
507
466
"""Lists all evals in an eval set."""
508
- # Load the eval set file data
509
- eval_set_file_path = _get_eval_set_file_path (
510
- app_name , agent_dir , eval_set_id
511
- )
512
- with open (eval_set_file_path , "r" ) as file :
513
- eval_set_data = json .load (file ) # Load JSON into a list
467
+ eval_set_data = eval_sets_manager .get_eval_set (app_name , eval_set_id )
514
468
515
469
return sorted ([x ["name" ] for x in eval_set_data ])
516
470
0 commit comments