File tree Expand file tree Collapse file tree 2 files changed +24
-0
lines changed Expand file tree Collapse file tree 2 files changed +24
-0
lines changed Original file line number Diff line number Diff line change @@ -283,6 +283,15 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the
283
283
To bind to ` 0.0.0.0 ` to enable remote connections, use ` python3 -m llama_cpp.server --host 0.0.0.0 ` .
284
284
Similarly, to change the port (default is 8000), use ` --port ` .
285
285
286
+ You probably also want to set the prompt format. For chatml, use
287
+
288
+ ``` bash
289
+ python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format chatml
290
+ ```
291
+
292
+ That will format the prompt according to how model expects it. You can find the prompt format in the model card.
293
+ For possible options, see [ llama_cpp/llama_chat_format.py] ( llama_cpp/llama_chat_format.py ) and look for lines starting with "@register_chat_format".
294
+
286
295
## Docker image
287
296
288
297
A Docker image is available on [ GHCR] ( https://ghcr.io/abetlen/llama-cpp-python ) . To run the server:
Original file line number Diff line number Diff line change @@ -456,6 +456,21 @@ def format_oasst_llama(
456
456
return ChatFormatterResponse (prompt = _prompt )
457
457
458
458
459
+ @register_chat_format ("baichuan-2" )
460
+ def format_baichuan2 (
461
+ messages : List [llama_types .ChatCompletionRequestMessage ],
462
+ ** kwargs : Any ,
463
+ ) -> ChatFormatterResponse :
464
+ _system_template = "{system_message}"
465
+ _roles = dict (user = "<reserved_106>" , assistant = "<reserved_107>" )
466
+ _sep = ""
467
+ system_message = _get_system_message (messages )
468
+ system_message = _system_template .format (system_message = system_message )
469
+ _messages = _map_roles (messages , _roles )
470
+ _messages .append ((_roles ["assistant" ], None ))
471
+ _prompt = _format_no_colon_single (system_message , _messages , _sep )
472
+ return ChatFormatterResponse (prompt = _prompt )
473
+
459
474
@register_chat_format ("openbuddy" )
460
475
def format_openbuddy (
461
476
messages : List [llama_types .ChatCompletionRequestMessage ],
You can’t perform that action at this time.
0 commit comments