@@ -93,9 +93,7 @@ def _load_shared_library(lib_base_name: str):
93
93
94
94
# from ggml-backend.h
95
95
# typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
96
- ggml_backend_sched_eval_callback = ctypes .CFUNCTYPE (
97
- c_bool , c_void_p , c_bool , c_void_p
98
- )
96
+ ggml_backend_sched_eval_callback = ctypes .CFUNCTYPE (c_bool , c_void_p , c_bool , c_void_p )
99
97
100
98
# llama.h bindings
101
99
@@ -2174,6 +2172,34 @@ def llama_sample_typical(
2174
2172
_lib .llama_sample_typical .restype = None
2175
2173
2176
2174
2175
+ # /// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.
2176
+ # LLAMA_API void llama_sample_entropy(
2177
+ # struct llama_context * ctx,
2178
+ # llama_token_data_array * candidates_p,
2179
+ # float min_temp,
2180
+ # float max_temp,
2181
+ # float exponent_val);
2182
+ def llama_sample_entropy (
2183
+ ctx : llama_context_p ,
2184
+ candidates , # type: _Pointer[llama_token_data_array]
2185
+ min_temp : Union [c_float , float ],
2186
+ max_temp : Union [c_float , float ],
2187
+ exponent_val : Union [c_float , float ],
2188
+ ):
2189
+ """Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772."""
2190
+ return _lib .llama_sample_entropy (ctx , candidates , min_temp , max_temp , exponent_val )
2191
+
2192
+
2193
+ _lib .llama_sample_entropy .argtypes = [
2194
+ llama_context_p ,
2195
+ llama_token_data_array_p ,
2196
+ c_float ,
2197
+ c_float ,
2198
+ c_float ,
2199
+ ]
2200
+ _lib .llama_sample_entropy .restype = None
2201
+
2202
+
2177
2203
# LLAMA_API void llama_sample_temp(
2178
2204
# struct llama_context * ctx,
2179
2205
# llama_token_data_array * candidates,
0 commit comments