diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index fdde7ea01..5dae9dde0 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -790,8 +790,9 @@ def generate( else: break if longest_prefix > 0: - if self.verbose: - print("Llama.generate: prefix-match hit", file=sys.stderr) + # disable this + # if self.verbose: + # print("Llama.generate: prefix-match hit", file=sys.stderr) reset = False tokens = tokens[longest_prefix:] self.n_tokens = longest_prefix