8000 Merge branch 'main' of github.com:abetlen/llama_cpp_python into main · qeleb/llama-cpp-python@15ee210 · GitHub
[go: up one dir, main page]

Skip to content

Commit 15ee210

Browse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents 99ff175 + 4b01a87 commit 15ee210

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

llama_cpp/llama.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,7 @@ def create_completion(
19171917
completion_or_chunks = self._create_completion(
19181918
prompt=prompt,
19191919
suffix=suffix,
1920-
max_tokens=max_tokens,
1920+
max_tokens=-1 if max_tokens is None else max_tokens,
19211921
temperature=temperature,
19221922
top_p=top_p,
19231923
min_p=min_p,
@@ -1951,7 +1951,7 @@ def __call__(
19511951
self,
19521952
prompt: str,
19531953
suffix: Optional[str] = None,
1954-
max_tokens: int = 128,
1954+
max_tokens: Optional[int] = 16,
19551955
temperature: float = 0.8,
19561956
top_p: float = 0.95,
19571957
min_p: float = 0.05,

llama_cpp/server/types.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ class CreateCompletionRequest(BaseModel):
110110
default=None,
111111
description="A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots.",
112112
)
113-
max_tokens: int = max_tokens_field
113+
max_tokens: Optional[int] = Field(
114+
default=16, ge=0, description="The maximum number of tokens to generate."
115+
)
114116
temperature: float = temperature_field
115117
top_p: float = top_p_field
116118
min_p: float = min_p_field

0 commit comments

Comments
 (0)
0