8000 Update llama.cpp · matthoffner/llama-cpp-python@b994296 · GitHub
[go: up one dir, main page]

Skip to content

Commit b994296

Browse files
committed
Update llama.cpp
1 parent 058b134 commit b994296

File tree

2 files changed

+17
-13
lines changed
Expand file tree

2 files changed

+17
-13
lines changed

llama_cpp/llama_cpp.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
c_int,
66
c_float,
77
c_char_p,
8+
c_int32,
9+
c_uint32,
810
c_void_p,
911
c_bool,
1012
POINTER,
@@ -105,6 +107,9 @@ def _load_shared_library(lib_base_name: str):
105107
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
106108
LLAMA_SESSION_VERSION = c_int(1)
107109

110+
# #define LLAMA_DEFAULT_SEED 0xFFFFFFFF
111+
LLAMA_DEFAULT_SEED = c_int(0xFFFFFFFF)
112+
108113
# struct llama_model;
109114
llama_model_p = c_void_p
110115

@@ -153,18 +158,17 @@ class llama_token_data_array(Structure):
153158

154159

155160
# struct llama_context_params {
156-
# int seed; // RNG seed, -1 for random
157-
# int n_ctx; // text context
158-
# int n_batch; // prompt processing batch size
159-
# int n_gpu_layers; // number of layers to store in VRAM
160-
# int main_gpu; // the GPU that is used for scratch and small tensors
161+
# uint32_t seed; // RNG seed, -1 for random
162+
# int32_t n_ctx; // text context
163+
# int32_t n_batch; // prompt processing batch size
164+
# int32_t n_gpu_layers; // number of layers to store in VRAM
165+
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
161166
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
162167
# // called with a progress value between 0 and 1, pass NULL to disable
163168
# llama_progress_callback progress_callback;
164169
# // context pointer passed to the progress callback
165170
# 8000 void * progress_callback_user_data;
166171

167-
168172
# // Keep the booleans together to avoid misalignment during copy-by-value.
169173
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
170174
# bool f16_kv; // use fp16 for KV cache
@@ -176,11 +180,11 @@ class llama_token_data_array(Structure):
176180
# };
177181
class llama_context_params(Structure):
178182
_fields_ = [
179-
("seed", c_int),
180-
("n_ctx", c_int),
181-
("n_batch", c_int),
182-
("n_gpu_layers", c_int),
183-
("main_gpu", c_int),
183+
("seed", c_uint32),
184+
("n_ctx", c_int32),
185+
("n_batch", c_int32),
186+
("n_gpu_layers", c_int32),
187+
("main_gpu", c_int32),
184188
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
185189
("progress_callback", llama_progress_callback),
186190
("progress_callback_user_data", c_void_p),
@@ -453,7 +457,7 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> int:
453457

454458
# Sets the current rng seed.
455459
# LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
456-
def llama_set_rng_seed(ctx: llama_context_p, seed: c_int):
460+
def llama_set_rng_seed(ctx: llama_context_p, seed: c_uint32):
457461
return _lib.llama_set_rng_seed(ctx, seed)
458462

459463

vendor/llama.cpp

0 commit comments

Comments
 (0)
0