8000
We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 153a004 commit cb79171Copy full SHA for cb79171
llama_cpp/llama.py
@@ -281,7 +281,7 @@ def __init__(
281
)
282
self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0
283
self.context_params.mul_mat_q = mul_mat_q
284
- self.context_params.logits_all = logits_all
+ self.context_params.logits_all = logits_all if draft_model is None else True # Must be set to True for speculative decoding
285
self.context_params.embedding = embedding
286
self.context_params.offload_kqv = offload_kqv
287
0 commit comments