@@ -77,6 +77,7 @@ class llama_context_params(Structure):
77
77
c_bool ,
78
78
), # the llama_eval() call computes all logits, not just the last one
79
79
("vocab_only" , c_bool ), # only load the vocabulary, no weights
80
+ ("use_mmap" , c_bool ), # use mmap if possible
80
81
("use_mlock" , c_bool ), # force system to keep model in RAM
81
82
("embedding" , c_bool ), # embedding mode only
82
83
# called with a progress value between 0 and 1, pass NULL to disable
@@ -99,6 +100,17 @@ def llama_context_default_params() -> llama_context_params:
99
100
_lib .llama_context_default_params .argtypes = []
100
101
_lib .llama_context_default_params .restype = llama_context_params
101
102
103
+ def llama_mmap_supported () -> c_bool :
104
+ return _lib .llama_mmap_supported ()
105
+
106
+ _lib .llama_mmap_supported .argtypes = []
107
+ _lib .llama_mmap_supported .restype = c_bool
108
+
109
+ def llama_mlock_supported () -> c_bool :
110
+ return _lib .llama_mlock_supported ()
111
+
112
+ _lib .llama_mlock_supported .argtypes = []
113
+ _lib .llama_mlock_supported .restype = c_bool
102
114
103
115
4D34
# Various functions for loading a ggml llama model.
104
116
# Allocate (almost) all memory needed for the model.
0 commit comments