5
5
c_int ,
6
6
c_float ,
7
7
c_char_p ,
8
+ c_int32 ,
9
+ c_uint32 ,
8
10
c_void_p ,
9
11
c_bool ,
10
12
POINTER ,
@@ -105,6 +107,9 @@ def _load_shared_library(lib_base_name: str):
105
107
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
106
108
LLAMA_SESSION_VERSION = c_int (1 )
107
109
110
+ # #define LLAMA_DEFAULT_SEED 0xFFFFFFFF
111
+ LLAMA_DEFAULT_SEED = c_int (0xFFFFFFFF )
112
+
108
113
# struct llama_model;
109
114
llama_model_p = c_void_p
110
115
@@ -153,18 +158,17 @@ class llama_token_data_array(Structure):
153
158
154
159
155
160
# struct llama_context_params {
156
- # int seed; // RNG seed, -1 for random
157
- # int n_ctx; // text context
158
- # int n_batch; // prompt processing batch size
159
- # int n_gpu_layers; // number of layers to store in VRAM
160
- # int main_gpu; // the GPU that is used for scratch and small tensors
161
+ # uint32_t seed; // RNG seed, -1 for random
162
+ # int32_t n_ctx; // text context
163
+ # int32_t n_batch; // prompt processing batch size
164
+ # int32_t n_gpu_layers; // number of layers to store in VRAM
165
+ # int32_t main_gpu; // the GPU that is used for scratch and small tensors
161
166
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
162
167
# // called with a progress value between 0 and 1, pass NULL to disable
163
168
# llama_progress_callback progress_callback;
164
169
# // context pointer passed to the progress callback
165
170
#
8000
void * progress_callback_user_data;
166
171
167
-
168
172
# // Keep the booleans together to avoid misalignment during copy-by-value.
169
173
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
170
174
# bool f16_kv; // use fp16 for KV cache
@@ -176,11 +180,11 @@ class llama_token_data_array(Structure):
176
180
# };
177
181
class llama_context_params (Structure ):
178
182
_fields_ = [
179
- ("seed" , c_int ),
180
- ("n_ctx" , c_int ),
181
- ("n_batch" , c_int ),
182
- ("n_gpu_layers" , c_int ),
183
- ("main_gpu" , c_int ),
183
+ ("seed" , c_uint32 ),
184
+ ("n_ctx" , c_int32 ),
185
+ ("n_batch" , c_int32 ),
186
+ ("n_gpu_layers" , c_int32 ),
187
+ ("main_gpu" , c_int32 ),
184
188
("tensor_split" , c_float * LLAMA_MAX_DEVICES .value ),
185
189
("progress_callback" , llama_progress_callback ),
186
190
("progress_callback_user_data" , c_void_p ),
@@ -453,7 +457,7 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> int:
453
457
454
458
# Sets the current rng seed.
455
459
# LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
456
- def llama_set_rng_seed (ctx : llama_context_p , seed : c_int ):
460
+ def llama_set_rng_seed (ctx : llama_context_p , seed : c_uint32 ):
457
461
return _lib .llama_set_rng_seed (ctx , seed )
458
462
459
463
0 commit comments