log prompt + nits

Olivier Chafik · Olivier Chafik · commit 82052466d632 · 2025-01-30T14:29:16.000Z
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -3823,7 +3823,9 @@ int main(int argc, char ** argv) {
         std::vector<server_task> tasks;
 
         try {
-            std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, data.at("prompt"), true, true);
+            const auto & prompt = data.at("prompt");
+            LOG_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
+            std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
             tasks.reserve(tokenized_prompts.size());
             for (size_t i = 0; i < tokenized_prompts.size(); i++) {
                 server_task task = server_task(type);
diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py
@@ -15,7 +15,7 @@ def create_server():
     [
         (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None),
         (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None),
-        (None, "Book", "What is the best book", 8, "^ blue|I want to play with", 23, 8, "length", True, "This is not a chat template, it is"),
+        (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"),
         ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None),
         ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None),
     ]
diff --git a/src/llama-grammar.h b/src/llama-grammar.h
@@ -118,10 +118,10 @@ struct llama_grammar {
     // lazy grammars wait for trigger words or tokens before constraining the sampling.
     // we still ahve trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
     // (useful e.g. for tool_choice=required)
-    bool                     lazy;
-    bool                     awaiting_trigger; // Initialized to true for lazy grammars only
-    std::string              trigger_buffer;   // Output buffered by lazy grammar. Will be cleared once trigger is found.
-    std::vector<llama_token> trigger_tokens;   // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
+    bool                     lazy             = false;
+    bool                     awaiting_trigger = false; // Initialized to true for lazy grammars only
+    std::string              trigger_buffer;           // Output buffered by lazy grammar. Will be cleared once trigger is found.
+    std::vector<llama_token> trigger_tokens;           // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
     std::vector<std::string> trigger_words;
 };
 

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ def create_server():`
`15`	`15`	`[`
`16`	`16`	`(None, "Book", "What is the best book", 8, "(Suddenly)+\|\\{ \" Sarax.", 77, 8, "length", False, None),`
`17`	`17`	`(None, "Book", "What is the best book", 8, "(Suddenly)+\|\\{ \" Sarax.", 77, 8, "length", True, None),`
`18`		`- (None, "Book", "What is the best book", 8, "^ blue\|I want to play with", 23, 8, "length", True, "This is not a chat template, it is"),`
	`18`	`+ (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"),`
`19`	`19`	`("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside\|she\|felter\|alonger)+", 104, 64, "length", False, None),`
`20`	`20`	`("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside\|she\|felter\|alonger)+", 104, 64, "length", True, None),`
`21`	`21`	`]`