8000 Update llama.cpp · KoolSameer/llama-cpp-python@98ae4e5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 98ae4e5

Browse files
committed
Update llama.cpp
1 parent a1b2d5c commit 98ae4e5

File tree

3 files changed

+43
-1
lines changed

3 files changed

+43
-1
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ deploy.gh-docs:
3333
mkdocs build
3434
mkdocs gh-deploy
3535

36+
test:
37+
python3 -m pytest
38+
3639
clean:
3740
- cd vendor/llama.cpp && make clean
3841
- cd vendor/llama.cpp && rm libllama.so

llama_cpp/llama_cpp.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import ctypes
44
from ctypes import (
5+
c_double,
56
c_int,
67
c_float,
78
c_char_p,
@@ -169,6 +170,7 @@ class llama_token_data_array(Structure):
169170
# // context pointer passed to the progress callback
170171
# void * progress_callback_user_data;
171172

173+
172174
# // Keep the booleans together to avoid misalignment during copy-by-value.
173175
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
174176
# bool f16_kv; // use fp16 for KV cache
@@ -256,6 +258,34 @@ class llama_model_quantize_params(Structure):
256258
]
257259

258260

261+
# // performance timing information
262+
# struct llama_timings {
263+
# double t_start_ms;
264+
# double t_end_ms;
265+
# double t_load_ms;
266+
# double t_sample_ms;
267+
# double t_p_eval_ms;
268+
# double t_eval_ms;
269+
270+
271+
# int32_t n_sample;
272+
# int32_t n_p_eval;
273+
# int32_t n_eval;
274+
# };
275+
class llama_timings(Structure):
276+
_fields_ = [
277+
("t_start_ms", c_double),
278+
("t_end_ms", c_double),
279+
("t_load_ms", c_double),
280+
("t_sample_ms", c_double),
281+
("t_p_eval_ms", c_double),
282+
("t_eval_ms", c_double),
283+
("n_sample", c_int32),
284+
("n_p_eval", c_int32),
285+
("n_eval", c_int32),
286+
]
287+
288+
259289
# LLAMA_API struct llama_context_params llama_context_default_params();
260290
def llama_context_default_params() -> llama_context_params:
261291
return _lib.llama_context_default_params()
@@ -991,6 +1021,15 @@ def llama_sample_token(
9911021
# Performance information
9921022

9931023

1024+
# LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
1025+
def llama_get_timings(ctx: llama_context_p) -> llama_timings:
1026+
return _lib.llama_get_timings(ctx)
1027+
1028+
1029+
_lib.llama_get_timings.argtypes = [llama_context_p]
1030+
_lib.llama_get_timings.restype = llama_timings
1031+
1032+
9941033
# LLAMA_API void llama_print_timings(struct llama_context * ctx);
9951034
def llama_print_timings(ctx: llama_context_p):
9961035
_lib.llama_print_timings(ctx)

vendor/llama.cpp

0 commit comments

Comments
 (0)
0