Move workaround to new sample method

abetlen · abetlen · commit 353e18a78186 · 2023-04-02T00:06:34.000-04:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -169,6 +169,11 @@ def sample(
             The sampled token.
         """
         assert self.ctx is not None
+        # Temporary workaround for https://github.com/ggerganov/llama.cpp/issues/684
+        if temp == 0.0:
+            temp = 1.0
+            top_p = 0.0
+            top_k = 1
         return llama_cpp.llama_sample_top_p_top_k(
             ctx=self.ctx,
             last_n_tokens_data=(llama_cpp.llama_token * self.last_n_tokens_size)(
@@ -209,11 +214,6 @@ def generate(
         Yields:
             The generated tokens.
         """
-        # Temporary workaround for https://github.com/ggerganov/llama.cpp/issues/684
-        if temp == 0.0:
-            temp = 1.0
-            top_p = 0.0
-            top_k = 1
         assert self.ctx is not None
         self.reset()
         while True: