@@ -339,16 +339,7 @@ def chat_completion_handler(
339339 stop = stop + rstop
340340
341341 if response_format is not None and response_format ["type" ] == "json_object" :
342- try :
343- # create grammar from json schema
344- if "schema" in response_format :
345- grammar = llama_grammar .LlamaGrammar .from_json_schema (
346- json .dumps (response_format ["schema" ]), verbose = llama .verbose
347- )
348- except Exception as e :
349- grammar = llama_grammar .LlamaGrammar .from_string (
350- llama_grammar .JSON_GBNF , verbose = llama .verbose
351- )
342+ grammar = _grammar_for_response_format (response_format , verbose = llama .verbose )
352343
353344 completion_or_chunks = llama .create_completion (
354345 prompt = prompt ,
@@ -606,6 +597,35 @@ def _format_chatglm3(
606597 ret += role
607598 return ret
608599
600+ def _grammar_for_json (verbose :bool = False ):
601+ return llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF , verbose = verbose )
602+
603+ def _grammar_for_json_schema (
604+ schema : str ,
605+ verbose : bool = False ,
606+ fallback_to_json : bool = True
607+ ):
608+ try :
609+ return llama_grammar .LlamaGrammar .from_json_schema (schema , verbose = verbose )
610+ except Exception as e :
611+ if fallback_to_json :
612+ return _grammar_for_json (verbose = verbose )
613+ else :
614+ raise e
615+
616+ def _grammar_for_response_format (
617+ response_format : llama_types .ChatCompletionRequestResponseFormat ,
618+ verbose : bool = False
619+ ):
620+ if response_format ["type" ] != "json_object" :
621+ return None
622+
623+ if "schema" in response_format :
624+ return _grammar_for_json_schema (
625+ json .dumps (response_format ["schema" ]), verbose = verbose
626+ )
627+ else :
628+ return _grammar_for_json (verbose = verbose )
609629
610630### Chat Formats ###
611631
@@ -1994,16 +2014,7 @@ def __call__(
19942014 prompt = llama .input_ids [: llama .n_tokens ].tolist ()
19952015
19962016 if response_format is not None and response_format ["type" ] == "json_object" :
1997- try :
1998- # create grammar from json schema
1999- if "schema" in response_format :
2000- grammar = llama_grammar .LlamaGrammar .from_json_schema (
2001- json .dumps (response_format ["schema" ])
2002- )
2003- except Exception as e :
2004- grammar = llama_grammar .LlamaGrammar .from_string (
2005- llama_grammar .JSON_GBNF
2006- )
2017+ grammar = _grammar_for_response_format (response_format )
20072018
20082019 return _convert_completion_to_chat (
20092020 llama .create_completion (
@@ -2159,26 +2170,10 @@ def chatml_function_calling(
21592170 tool_calls = None ,
21602171 add_generation_prompt = True ,
21612172 )
2173+
21622174 if response_format is not None and response_format ["type" ] == "json_object" :
2163- try :
2164- grammar = (
2165- llama_grammar .LlamaGrammar .from_json_schema (
2166- json .dumps (response_format ["schema" ])
2167- )
2168- if "schema" in response_format
2169- else None
2170- )
2171- except Exception as e :
2172- if llama .verbose :
2173- print (
2174- "Failed to parse response format as JSON schema, falling back to default grammar"
2175- )
2176- print (e )
2177- grammar = (
2178- llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF )
2179- if grammar is None
2180- else grammar
2181- )
2175+ grammar = _grammar_for_response_format (response_format )
2176+
21822177 return _convert_completion_to_chat (
21832178 llama .create_completion (
21842179 prompt = prompt ,
0 commit comments