@@ -1444,6 +1444,24 @@ def llama_get_model_tensor(
1444
1444
...
1445
1445
1446
1446
1447
+ # // Returns true if the model contains an encoder that requires llama_encode() call
1448
+ # LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
1449
+ @ctypes_function ("llama_model_has_encoder" , [llama_model_p_ctypes ], ctypes .c_bool )
1450
+ def llama_model_has_encoder (model : llama_model_p , / ) -> bool :
1451
+ """Returns true if the model contains an encoder that requires llama_encode() call"""
1452
+ ...
1453
+
1454
+
1455
+ # // For encoder-decoder models, this function returns id of the token that must be provided
1456
+ # // to the decoder to start generating output sequence. For other models, it returns -1.
1457
+ # LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
1458
+ @ctypes_function ("llama_model_decoder_start_token" , [llama_model_p_ctypes ], ctypes .c_int32 )
1459
+ def llama_model_decoder_start_token (model : llama_model_p , / ) -> int :
1460
+ """For encoder-decoder models, this function returns id of the token that must be provided
1461
+ to the decoder to start generating output sequence. For other models, it returns -1."""
1462
+ ...
1463
+
1464
+
1447
1465
# // Returns 0 on success
1448
1466
# LLAMA_API uint32_t llama_model_quantize(
1449
1467
# const char * fname_inp,
@@ -2271,6 +2289,22 @@ def llama_batch_free(batch: llama_batch, /):
2271
2289
...
2272
2290
2273
2291
2292
+ # // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2293
+ # // Stores the encoder output internally for later use by the decoder cross-attention layers.
2294
+ # // 0 - success
2295
+ # // < 0 - error
2296
+ # LLAMA_API int32_t llama_encode(
2297
+ # struct llama_context * ctx,
2298
+ # struct llama_batch batch);
2299
+ @ctypes_function ("llama_encode" , [llama_context_p_ctypes , llama_batch ], ctypes .c_int32 )
2300
+ def llama_encode (ctx : llama_context_p , batch : llama_batch , / ) -> int :
2301
+ """Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2302
+ Stores the encoder output internally for later use by the decoder cross-attention layers.
2303
+ 0 - success
2304
+ < 0 - error"""
2305
+ ...
2306
+
2307
+
2274
2308
# // Positive return values does not mean a fatal error, but rather a warning.
2275
2309
# // 0 - success
2276
2310
# // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
0 commit comments