@@ -536,6 +536,81 @@ def copy_logits(self, logits: npt.NDArray[np.single]):
536
536
self .candidates .size = llama_cpp .c_size_t (self .n_vocab )
537
537
538
538
539
+ # Python wrappers over common/common
540
+ def _tokenize (model : _LlamaModel , text : str , add_bos : bool , special : bool ) -> list [int ]:
541
+ n_tokens = len (text ) + 1 if add_bos else len (text )
542
+ result = (llama_cpp .llama_token * n_tokens )()
543
+ n_tokens = llama_cpp .llama_tokenize (
544
+ model .model ,
545
+ text .encode ("utf-8" ),
546
+ len (text ),
547
+ result ,
548
+ n_tokens ,
549
+ add_bos ,
550
+ special ,
551
+ )
552
+ if n_tokens < 0 :
553
+ result = (llama_cpp .llama_token * - n_tokens )()
554
+ check = llama_cpp .llama_tokenize (
555
+ model .model ,
556
+ text .encode ("utf-8" ),
557
+ len (text ),
558
+ result ,
559
+ len (result ),
560
+ add_bos ,
561
+ special ,
562
+ )
563
+ if check != - n_tokens :
564
+ raise RuntimeError (f'Failed to tokenize: text="{ text } " n_tokens={ n_tokens } ' )
565
+ else :
566
+ result = result [:n_tokens ]
567
+ return list (result )
568
+
569
+
570
+ def _token_to_piece (model : _LlamaModel , token : int ) -> str :
571
+ assert model .model is not None
572
+ result = (ctypes .c_char * 8 )(0 )
573
+ n_tokens = llama_cpp .llama_token_to_piece (model .model , token , result , len (result ))
574
+ if n_tokens < 0 :
575
+ result = (ctypes .c_char * - n_tokens )(0 )
576
+ check = llama_cpp .llama_token_to_piece (model .model , token , result , len (result ))
577
+ if check != - n_tokens :
578
+ raise RuntimeError (f"Failed to get piece: token={ token } " )
579
+ else :
580
+ result = result [:n_tokens ]
581
+ return bytes (result ).decode ("utf-8" )
582
+
583
+
584
+ def _detokenize_spm (model : _LlamaModel , tokens : List [int ]) -> str :
585
+ bos_id = model .token_bos ()
586
+ result = ""
587
+ for i , token in enumerate (tokens ):
588
+ piece = _token_to_piece (model , token )
589
+ if (
590
+ (tokens [0 ] == bos_id and i == 1 ) or (tokens [0 ] != bos_id and i == 0 )
591
+ ) and piece [0 ] == " " :
592
+ piece = piece [1 :]
593
+ result += piece
594
+ return result
595
+
596
+
597
+ def _detokenize_bpe (model : _LlamaModel , tokens : List [int ]) -> str :
598
+ result = ""
599
+ for token in tokens :
600
+ piece = _token_to_piece (model , token )
601
+ result += piece
602
+ return result
603
+
604
+
605
+ def _should_add_bos (model : _LlamaModel ) -> bool :
606
+ assert model .model is not None
607
+ add_bos = llama_cpp .llama_add_bos_token (model .model )
608
+ if add_bos != - 1 :
609
+ return add_bos != 0
610
+ else :
611
+ return llama_cpp .llama_vocab_type (model .model ) == llama_cpp .LLAMA_VOCAB_TYPE_SPM
612
+
613
+
539
614
# Python wrappers over common/sampling structs
540
615
541
616
0 commit comments