8
8
c_void_p ,
9
9
c_bool ,
10
10
POINTER ,
11
+ _Pointer , # type: ignore
11
12
Structure ,
12
13
Array ,
13
14
c_uint8 ,
@@ -252,9 +253,7 @@ def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
252
253
# Copies the state to the specified destination address.
253
254
# Destination needs to have allocated enough memory.
254
255
# Returns the number of bytes copied
255
- def llama_copy_state_data (
256
- ctx : llama_context_p , dest # type: Array[c_uint8]
257
- ) -> c_size_t :
256
+ def llama_copy_state_data (ctx : llama_context_p , dest : Array [c_uint8 ]) -> c_size_t :
258
257
return _lib .llama_copy_state_data (ctx , dest )
259
258
260
259
@@ -278,9 +277,9 @@ def llama_set_state_data(
278
277
def llama_load_session_file (
279
278
ctx : llama_context_p ,
280
279
path_session : bytes ,
281
- tokens_out , # type : Array[llama_token]
280
+ tokens_out : Array [llama_token ],
282
281
n_token_capacity : c_size_t ,
283
- n_token_count_out , # type: Array [c_size_t]
282
+ n_token_count_out : _Pointer [c_size_t ],
284
283
) -> c_size_t :
285
284
return _lib .llama_load_session_file (
286
285
ctx , path_session , tokens_out , n_token_capacity , n_token_count_out
@@ -300,7 +299,7 @@ def llama_load_session_file(
300
299
def llama_save_session_file (
301
300
ctx : llama_context_p ,
302
301
path_session : bytes ,
303
- tokens , # type : Array[llama_token]
302
+ tokens : Array [llama_token ],
304
303
n_token_count : c_size_t ,
305
304
) -> c_size_t :
306
305
return _lib .llama_save_session_file (ctx , path_session , tokens , n_token_count )
@@ -321,7 +320,7 @@ def llama_save_session_file(
321
320
# Returns 0 on success
322
321
def llama_eval (
323
322
ctx : llama_context_p ,
324
- tokens , # type : Array[llama_token]
323
+ tokens : Array [llama_token ],
325
324
n_tokens : c_int ,
326
325
n_past : c_int ,
327
326
n_threads : c_int ,
@@ -440,8 +439,8 @@ def llama_token_nl() -> llama_token:
440
439
# @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
441
440
def llama_sample_repetition_penalty (
442
441
ctx : llama_context_p ,
443
- candidates , # type: Array [llama_token_data]
444
- last_tokens_data , # type : Array[llama_token]
442
+ candidates : _Pointer [llama_token_data ],
443
+ last_tokens_data : Array [llama_token ],
445
444
last_tokens_size : c_int ,
446
445
penalty : c_float ,
447
446
):
@@ -463,8 +462,8 @@ def llama_sample_repetition_penalty(
463
462
# @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
464
463
def llama_sample_frequency_and_presence_penalties (
465
464
ctx : llama_context_p ,
466
- candidates , # type: Array [llama_token_data]
467
- last_tokens_data , # type : Array[llama_token]
465
+ candidates : _Pointer [llama_token_data ],
466
+ last_tokens_data : Array [llama_token ],
468
467
last_tokens_size : c_int ,
469
468
alpha_frequency : c_float ,
470
469
alpha_presence : c_float ,
@@ -491,10 +490,7 @@ def llama_sample_frequency_and_presence_penalties(
491
490
492
491
493
492
# @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
494
- def llama_sample_softmax (
495
- ctx : llama_context_p ,
496
- candidates # type: Array[llama_token_data]
497
- ):
493
+ def llama_sample_softmax (ctx : llama_context_p , candidates : _Pointer [llama_token_data ]):
498
494
return _lib .llama_sample_softmax (ctx , candidates )
499
495
500
496
@@ -507,10 +503,10 @@ def llama_sample_softmax(
507
503
508
504
# @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
509
505
def llama_sample_top_k (
510
- ctx : llama_context_p ,
511
- candidates , # type: Array [llama_token_data]
512
- k : c_int ,
513
- min_keep : c_size_t = c_size_t (1 )
506
+ ctx : llama_context_p ,
507
+ candidates : _Pointer [llama_token_data ],
508
+ k : c_int ,
509
+ min_keep : c_size_t = c_size_t (1 ),
514
510
):
515
511
return _lib .llama_sample_top_k (ctx , candidates , k , min_keep )
516
512
@@ -526,10 +522,10 @@ def llama_sample_top_k(
526
522
527
523
# @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
528
524
def llama_sample_top_p (
529
- ctx : llama_context_p ,
530
- candidates , # type: Array [llama_token_data]
531
- p : c_float ,
532
- min_keep : c_size_t = c_size_t (1 )
525
+ ctx : llama_context_p ,
526
+ candidates : _Pointer [llama_token_data ],
527
+ p : c_float ,
528
+ min_keep : c_size_t = c_size_t (1 ),
533
529
):
534
530
return _lib .llama_sample_top_p (ctx , candidates , p , min_keep )
535
531
@@ -546,9 +542,9 @@ def llama_sample_top_p(
546
542
# @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
547
543
def llama_sample_tail_free (
548
544
ctx : llama_context_p ,
549
- candidates , # type: Array [llama_token_data]
545
+ candidates : _Pointer [llama_token_data ],
550
546
z : c_float ,
551
- min_keep : c_size_t = c_size_t (1 )
547
+ min_keep : c_size_t = c_size_t (1 ),
552
548
):
553
549
return _lib .llama_sample_tail_free (ctx , candidates , z , min_keep )
554
550
@@ -565,9 +561,9 @@ def llama_sample_tail_free(
565
561
# @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
566
562
def llama_sample_typical (
567
563
ctx : llama_context_p ,
568
- candidates , # type: Array [llama_token_data]
569
- p : c_float ,
570
- min_keep : c_size_t = c_size_t (1 )
564
+ candidates : _Pointer [llama_token_data ],
565
+ p : c_float ,
566
+ min_keep : c_size_t = c_size_t (1 ),
571
567
):
572
568
return _lib .llama_sample_typical (ctx , candidates , p , min_keep )
573
569
@@ -582,9 +578,7 @@ def llama_sample_typical(
582
578
583
579
584
580
def llama_sample_temperature (
585
- ctx : llama_context_p ,
586
- candidates , # type: Array[llama_token_data]
587
- temp : c_float
581
+ ctx : llama_context_p , candidates : _Pointer [llama_token_data ], temp : c_float
588
582
):
589
583
return _lib .llama_sample_temperature (ctx , candidates , temp )
590
584
@@ -605,11 +599,11 @@ def llama_sample_temperature(
605
599
# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
606
600
def llama_sample_token_mirostat (
607
601
ctx : llama_context_p ,
608
- candidates , # type: Array [llama_token_data]
602
+ candidates : _Pointer [llama_token_data ],
609
603
tau : c_float ,
610
- eta : c_float ,
604
+ eta : c_float ,
611
605
m : c_int ,
612
- mu # type: Array [c_float]
606
+ mu : _Pointer [c_float ],
613
607
) -> llama_token :
614
608
return _lib .llama_sample_token_mirostat (ctx , candidates , tau , eta , m , mu )
615
609
@@ -632,10 +626,10 @@ def llama_sample_token_mirostat(
632
626
# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
633
627
def llama_sample_token_mirostat_v2 (
634
628
ctx : llama_context_p ,
635
- candidates , # type: Array [llama_token_data]
636
- tau : c_float ,
629
+ candidates : _Pointer [llama_token_data ],
630
+ tau : c_float ,
637
631
eta : c_float ,
638
- mu # type: Array [c_float]
632
+ mu : _Pointer [c_float ],
639
633
) -> llama_token :
640
634
return _lib .llama_sample_token_mirostat_v2 (ctx , candidates , tau , eta , mu )
641
635
@@ -652,8 +646,7 @@ def llama_sample_token_mirostat_v2(
652
646
653
647
# @details Selects the token with the highest probability.
654
648
def llama_sample_token_greedy (
655
- ctx : llama_context_p ,
656
- candidates # type: Array[llama_token_data]
649
+ ctx : llama_context_p , candidates : _Pointer [llama_token_data ]
657
650
) -> llama_token :
658
651
return _lib .llama_sample_token_greedy (ctx , candidates )
659
652
@@ -667,8 +660,7 @@ def llama_sample_token_greedy(
667
660
668
661
# @details Randomly selects a token from the candidates based on their probabilities.
669
662
def llama_sample_token (
670
- ctx : llama_context_p ,
671
- candidates # type: Array[llama_token_data]
663
+ ctx : llama_context_p , candidates : _Pointer [llama_token_data ]
672
664
) -> llama_token :
673
665
return _lib .llama_sample_token (ctx , candidates )
674
666
0 commit comments