@@ -2380,6 +2380,18 @@ def llama_token_get_type(
2380
2380
...
2381
2381
2382
2382
2383
+ # // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
2384
+ # LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
2385
+ @ctypes_function (
2386
+ "llama_token_is_eog" , [llama_model_p_ctypes , llama_token ], ctypes .c_bool
2387
+ )
2388
+ def llama_token_is_eog (
2389
+ model : llama_model_p , token : Union [llama_token , int ], /
2390
+ ) -> bool :
2391
+ """Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)"""
2392
+ ...
2393
+
2394
+
2383
2395
# // Special tokens
2384
2396
2385
2397
@@ -2434,7 +2446,7 @@ def llama_add_eos_token(model: llama_model_p, /) -> int:
2434
2446
...
2435
2447
2436
2448
2437
- # // codellama infill tokens
2449
+ # // Codellama infill tokens
2438
2450
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
2439
2451
@ctypes_function ("llama_token_prefix" , [llama_model_p_ctypes ], llama_token )
2440
2452
def llama_token_prefix (model : llama_model_p ) -> int :
@@ -2524,18 +2536,21 @@ def llama_tokenize(
2524
2536
# // Uses the vocabulary in the provided context.
2525
2537
# // Does not write null terminator to the buffer.
2526
2538
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
2539
+ # // @param special If true, special tokens are rendered in the output.
2527
2540
# LLAMA_API int32_t llama_token_to_piece(
2528
2541
# const struct llama_model * model,
2529
2542
# llama_token token,
2530
2543
# char * buf,
2531
- # int32_t length);
2544
+ # int32_t length,
2545
+ # bool special);
2532
2546
@ctypes_function (
2533
2547
"llama_token_to_piece" ,
2534
2548
[
2535
2549
llama_model_p_ctypes ,
2536
2550
llama_token ,
2537
2551
ctypes .c_char_p ,
2538
2552
ctypes .c_int32 ,
2553
+ ctypes .c_bool ,
2539
2554
],
2540
2555
ctypes .c_int32 ,
2541
2556
)
@@ -2544,13 +2559,20 @@ def llama_token_to_piece(
2544
2559
token : Union [llama_token , int ],
2545
2560
buf : Union [ctypes .c_char_p , bytes , CtypesArray [ctypes .c_char ]],
2546
2561
length : Union [ctypes .c_int , int ],
2562
+ special : Union [ctypes .c_bool , bool ],
2547
2563
/ ,
2548
2564
) -> int :
2549
2565
"""Token Id -> Piece.
2550
2566
Uses the vocabulary in the provided context.
2551
2567
Does not write null terminator to the buffer.
2552
2568
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
2553
- """
2569
+
2570
+ Args:
2571
+ model: The model to use for tokenization.
2572
+ token: The token to convert.
2573
+ buf: The buffer to write the token to.
2574
+ length: The length of the buffer.
2575
+ special: If true, special tokens are rendered in the output."""
2554
2576
...
2555
2577
2556
2578
0 commit comments