8000 Pass-Through grammar parameter in web server. (#855) Closes #778 · maociao/llama-cpp-python@5f8f369 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5f8f369

Browse files
authored
Pass-Through grammar parameter in web server. (abetlen#855) Closes abetlen#778
1 parent 25cb710 commit 5f8f369

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

llama_cpp/server/app.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,10 @@ async def get_event_publisher(
518518
default=0.1, ge=0.001, le=1.0, description="Mirostat learning rate"
519519
)
520520

521+
grammar = Field(
522+
default=None,
523+
description="A CBNF grammar (as string) to be used for formatting the model's output."
524+
)
521525

522526
class CreateCompletionRequest(BaseModel):
523527
prompt: Union[str, List[str]] = Field(
@@ -533,6 +537,7 @@ class CreateCompletionRequest(BaseModel):
533537
mirostat_mode: int = mirostat_mode_field
534538
mirostat_tau: float = mirostat_tau_field
535539
mirostat_eta: float = mirostat_eta_field
540+
grammar: Optional[str] = None
536541
echo: bool = Field(
537542
default=False,
538543
description="Whether to echo the prompt in the generated text. Useful for chatbots.",
@@ -634,6 +639,9 @@ async def create_completion(
634639
]
635640
)
636641

642+
if body.grammar is not None:
643+
kwargs["grammar"] = llama_cpp.LlamaGrammar.from_string(body.grammar)
644+
637645
iterator_or_completion: Union[
638646
llama_cpp.Completion, Iterator[llama_cpp.CompletionChunk]
639647
] = await run_in_threadpool(llama, **kwargs)
@@ -714,6 +722,7 @@ class CreateChatCompletionRequest(BaseModel):
714722
mirostat_mode: int = mirostat_mode_field
715723
mirostat_tau: float = mirostat_tau_field
716724
mirostat_eta: float = mirostat_eta_field
725+
grammar: Optional[str] = None
717726
stop: Optional[List[str]] = stop_field
718727
stream: bool = stream_field
719728
presence_penalty: Optional[float] = presence_penalty_field
@@ -772,6 +781,9 @@ async def create_chat_completion(
772781
]
773782
)
774783

784+
if body.grammar is not None:
785+
kwargs["grammar"] = llama_cpp.LlamaGrammar.from_string(body.grammar)
786+
775787
iterator_or_completion: Union[
776788
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
777789
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)

0 commit comments

Comments
 (0)
0