You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-6Lines changed: 6 additions & 6 deletions
Original file line number
Diff line number
Diff line change
@@ -1296,7 +1296,7 @@ def _create_completion(
1296
1296
self,
1297
1297
prompt: Union[str, List[int]],
1298
1298
suffix: Optional[str] =None,
1299
-
max_tokens: int=16,
1299
+
max_tokens: Optional[int]=16,
1300
1300
temperature: float=0.8,
1301
1301
top_p: float=0.95,
1302
1302
logprobs: Optional[int] =None,
@@ -1350,7 +1350,7 @@ def _create_completion(
1350
1350
f"Requested tokens ({len(prompt_tokens)}) exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
1351
1351
)
1352
1352
1353
-
ifmax_tokens<=0:
1353
+
ifmax_tokensisNoneormax_tokens<=0:
1354
1354
# Unlimited, depending on n_ctx.
1355
1355
max_tokens=self._n_ctx-len(prompt_tokens)
1356
1356
@@ -1762,7 +1762,7 @@ def create_completion(
1762
1762
self,
1763
1763
prompt: Union[str, List[int]],
1764
1764
suffix: Optional[str] =None,
1765
-
max_tokens: int=128,
1765
+
max_tokens: Optional[int]=16,
1766
1766
temperature: float=0.8,
1767
1767
top_p: float=0.95,
1768
1768
logprobs: Optional[int] =None,
@@ -1788,7 +1788,7 @@ def create_completion(
1788
1788
Args:
1789
1789
prompt: The prompt to generate text from.
1790
1790
suffix: A suffix to append to the generated text. If None, no suffix is appended.
1791
-
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to
8000
generate is unlimited and depends on n_ctx.
1791
+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
1792
1792
temperature: The temperature to use for sampling.
1793
1793
top_p: The top-p value to use for sampling.
1794
1794
logprobs: The number of logprobs to return. If None, no logprobs are returned.
stop: A list of strings to stop generation when encountered.
1947
-
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
1947
+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
1948
1948
repeat_penalty: The penalty to apply to repeated tokens.
0 commit comments