From 3bef2c2c1393726e855573f83876fc20858c7179 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Thu, 22 Feb 2024 12:04:23 +0100 Subject: [PATCH 1/2] Add Google's Gemma formatting via `chat_format="gemma"` --- llama_cpp/llama_chat_format.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 8dd0ddfd2..ea9009938 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -993,6 +993,26 @@ def format_saiga( return ChatFormatterResponse(prompt=_prompt.strip()) +# Chat format for Google's Gemma models, see more details and available models: +# https://huggingface.co/collections/google/gemma-release-65d5efbccdbb8c4202ec078b +@register_chat_format("gemma") +def format_gemma( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_message = _get_system_message(messages) + if system_message is not None and system_message != "": + raise ValueError( + "`role='system'` messages are not allowed on Google's Gemma models." + ) + _roles = dict(user="user\n", assistant="model\n") + _sep = "\n" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message="", messages=_messages, sep=_sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + # Tricky chat formats that require custom chat handlers From afd286967112c4a722815304ada39860def12857 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome Date: Fri, 23 Feb 2024 08:37:39 +0100 Subject: [PATCH 2/2] Replace `raise ValueError` with `logger.debug` Co-authored-by: Andrei --- llama_cpp/llama_chat_format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index ea9009938..16bccb942 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -14,6 +14,7 @@ import llama_cpp.llama_types as llama_types import llama_cpp.llama_grammar as llama_grammar +from ._logger import logger from ._utils import suppress_stdout_stderr, Singleton ### Common Chat Templates and Special Tokens ### @@ -1002,7 +1003,7 @@ def format_gemma( ) -> ChatFormatterResponse: system_message = _get_system_message(messages) if system_message is not None and system_message != "": - raise ValueError( + logger.debug( "`role='system'` messages are not allowed on Google's Gemma models." ) _roles = dict(user="user\n", assistant="model\n")