8000 feat: Update llama.cpp · ducky777/llama-cpp-python@6225f02 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6225f02

Browse files
committed
feat: Update llama.cpp
1 parent 748c0ce commit 6225f02

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,18 @@ class llama_timings(Structure):
664664
]
665665

666666

667+
# // used in chat template
668+
# typedef struct llama_chat_message {
669+
# const char * role;
670+
# const char * content;
671+
# } llama_chat_message;
672+
class llama_chat_message(Structure):
673+
_fields_ = [
674+
("role", c_char_p),
675+
("content", c_char_p),
676+
]
677+
678+
667679
# // Helpers for getting default parameters
668< 8000 code>680
# LLAMA_API struct llama_model_params llama_model_default_params(void);
669681
def llama_model_default_params() -> llama_model_params:
@@ -1956,6 +1968,47 @@ def llama_token_to_piece(
19561968
_lib.llama_token_to_piece.restype = c_int32
19571969

19581970

1971+
# /// Apply chat template. Inspired by hf apply_chat_template() on python.
1972+
# /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
1973+
# /// NOTE: This function only support some known jinja templates. It is not a jinja parser.
1974+
# /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
1975+
# /// @param chat Pointer to a list of multiple llama_chat_message
1976+
# /// @param n_msg Number of llama_chat_message in this chat
1977+
# /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
1978+
# /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
1979+
# /// @param length The size of the allocated buffer
1980+
# /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
1981+
# LLAMA_API int32_t llama_chat_apply_template(
1982+
# const struct llama_model * model,
1983+
# const char * tmpl,
1984+
# const struct llama_chat_message * chat,
1985+
# size_t n_msg,
1986+
# bool add_ass,
1987+
# char * buf,
1988+
# int32_t length);
1989+
def llama_chat_apply_template(
1990+
model: llama_model_p,
1991+
tmpl: bytes,
1992+
chat: "ctypes._Pointer[llama_chat_message]",
1993+
n_msg: int,
1994+
) -> int:
1995+
return _lib.llama_chat_apply_template(
1996+
model,
1997+
tmpl,
1998+
chat,
1999+
n_msg
2000+
)
2001+
2002+
_lib.llama_chat_apply_template.argtypes = [
2003+
ctypes.c_void_p,
2004+
ctypes.c_char_p,
2005+
ctypes.POINTER(llama_chat_message),
2006+
ctypes.c_size_t
2007+
]
2008+
_lib.llama_chat_apply_template.restype = ctypes.c_int32
2009+
2010+
2011+
19592012
# //
19602013
# // Grammar
19612014
# //

vendor/llama.cpp

0 commit comments

Comments
 (0)
0