diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index a37a62aef..0970509a7 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1034,6 +1034,7 @@ def _create_completion( logits_processor: Optional[LogitsProcessorList] = None, grammar: Optional[LlamaGrammar] = None, logit_bias: Optional[Dict[str, float]] = None, + reset: bool = True, ) -> Union[ Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse] ]: @@ -1222,6 +1223,7 @@ def logit_bias_processor( stopping_criteria=stopping_criteria, logits_processor=logits_processor, grammar=grammar, + reset=reset, ): assert self._model.model is not None if llama_cpp.llama_token_is_eog(self._model.model, token): @@ -1653,6 +1655,7 @@ def create_completion( logits_processor: Optional[LogitsProcessorList] = None, grammar: Optional[LlamaGrammar] = None, logit_bias: Optional[Dict[str, float]] = None, + reset: bool = True, ) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]: """Generate text from a prompt. @@ -1716,6 +1719,7 @@ def create_completion( logits_processor=logits_processor, grammar=grammar, logit_bias=logit_bias, + reset=reset, ) if stream: chunks: Iterator[CreateCompletionStreamResponse] = completion_or_chunks @@ -1746,6 +1750,7 @@ def __call__( mirostat_tau: float = 5.0, mirostat_eta: float = 0.1, model: Optional[str] = None, + reset: bool = True, stopping_criteria: Optional[StoppingCriteriaList] = None, logits_processor: Optional[LogitsProcessorList] = None, grammar: Optional[LlamaGrammar] = None, @@ -1813,6 +1818,7 @@ def __call__( logits_processor=logits_processor, grammar=grammar, logit_bias=logit_bias, + reset=reset, ) def create_chat_completion(