Merge branch 'main' of github.com:abetlen/llama_cpp_python into main

abetlen · abetlen · commit 36679a58ef88 · 2023-11-22T19:49:59.000-05:00
diff --git a/README.md b/README.md
@@ -283,6 +283,15 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the
 To bind to `0.0.0.0` to enable remote connections, use `python3 -m llama_cpp.server --host 0.0.0.0`.
 Similarly, to change the port (default is 8000), use `--port`.
 
+You probably also want to set the prompt format. For chatml, use
+
+```bash
+python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format chatml
+```
+
+That will format the prompt according to how model expects it. You can find the prompt format in the model card.
+For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_format.py) and look for lines starting with "@register_chat_format".
+
 ## Docker image
 
 A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server:
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -456,6 +456,21 @@ def format_oasst_llama(
     return ChatFormatterResponse(prompt=_prompt)
 
 
+@register_chat_format("baichuan-2")
+def format_baichuan2(
+    messages: List[llama_types.ChatCompletionRequestMessage],
+    **kwargs: Any,
+) -> ChatFormatterResponse:
+    _system_template = "{system_message}"
+    _roles = dict(user="<reserved_106>", assistant="<reserved_107>")
+    _sep = ""
+    system_message = _get_system_message(messages)
+    system_message = _system_template.format(system_message=system_message)
+    _messages = _map_roles(messages, _roles)
+    _messages.append((_roles["assistant"], None))
+    _prompt = _format_no_colon_single(system_message, _messages, _sep)
+    return ChatFormatterResponse(prompt=_prompt)
+
 @register_chat_format("openbuddy")
 def format_openbuddy(
     messages: List[llama_types.ChatCompletionRequestMessage],