|
2 | 2 |
|
3 | 3 | import os
|
4 | 4 | import sys
|
5 |
| -import abc |
6 | 5 | import uuid
|
7 | 6 | import time
|
8 | 7 | import multiprocessing
|
|
15 | 14 | Iterator,
|
16 | 15 | Deque,
|
17 | 16 | Callable,
|
18 |
| - Any, |
19 | 17 | )
|
20 | 18 | from collections import deque
|
21 | 19 |
|
|
31 | 29 | LlamaDiskCache, # type: ignore
|
32 | 30 | LlamaRAMCache, # type: ignore
|
33 | 31 | )
|
| 32 | +from .llama_tokenizer import ( |
| 33 | + BaseLlamaTokenizer, |
| 34 | + LlamaTokenizer |
| 35 | +) |
34 | 36 | import llama_cpp.llama_cpp as llama_cpp
|
35 | 37 | import llama_cpp.llama_chat_format as llama_chat_format
|
36 | 38 |
|
@@ -1747,69 +1749,6 @@ def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
|
1747 | 1749 | return longest_prefix
|
1748 | 1750 |
|
1749 | 1751 |
|
1750 |
| -class BaseLlamaTokenizer(abc.ABC): |
1751 |
| - @abc.abstractmethod |
1752 |
| - def tokenize(self, text: bytes, add_bos: bool = True, special: bool = True) -> List[int]: |
1753 |
| - raise NotImplementedError |
1754 |
| - |
1755 |
| - @abc.abstractmethod |
1756 |
| - def detokenize(self, tokens: List[int], prev_tokens: Optional[List[int]] = None) -> bytes: |
1757 |
| - raise NotImplementedError |
1758 |
| - |
1759 |
| - |
1760 |
| -class LlamaTokenizer(BaseLlamaTokenizer): |
1761 |
| - def __init__(self, llama: Llama): |
1762 |
| - self.llama = llama |
1763 |
| - self._model = llama._model # type: ignore |
1764 |
| - |
1765 |
| - def tokenize(self, text: bytes, add_bos: bool = True, special: bool = True) -> List[int]: |
1766 |
| - return self._model.tokenize(text, add_bos=add_bos, special=special) |
1767 |
| - |
1768 |
| - def detokenize(self, tokens: List[int], prev_tokens: Optional[List[int]] = None) -> bytes: |
1769 |
| - if prev_tokens is not None: |
1770 |
| - return self._model.detokenize(tokens[len(prev_tokens):]) |
1771 |
| - else: |
1772 |
| - return self._model.detokenize(tokens) |
1773 |
| - |
1774 |
| - def encode(self, text: str, add_bos: bool = True, special: bool = True) -> List[int]: |
1775 |
| - return self.tokenize( |
1776 |
| - text.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special |
1777 |
| - ) |
1778 |
| - |
1779 |
| - def decode(self, tokens: List[int]) -> str: |
1780 |
| - return self.detokenize(tokens).decode("utf-8", errors="ignore") |
1781 |
| - |
1782 |
| - @classmethod |
1783 |
| - def from_ggml_file(cls, path: str) -> "LlamaTokenizer": |
1784 |
| - return cls(Llama(model_path=path, vocab_only=True)) |
1785 |
| - |
1786 |
| - |
1787 |
| -class LlamaHFTokenizer(BaseLlamaTokenizer): |
1788 |
| - def __init__(self, hf_tokenizer: Any): |
1789 |
| - self.hf_tokenizer = hf_tokenizer |
1790 |
| - |
1791 |
| - def tokenize(self, text: bytes, add_bos: bool = True, special: bool = True) -> List[int]: |
1792 |
| - return self.hf_tokenizer.encode(text.decode("utf-8", errors="ignore"), add_special_tokens=special) |
1793 |
| - |
1794 |
| - def detokenize(self, tokens: List[int], prev_tokens: Optional[List[int]] = None) -> bytes: |
1795 |
| - if prev_tokens is not None: |
1796 |
| - text = self.hf_tokenizer.decode(tokens).encode("utf-8", errors="ignore") |
1797 |
| - prev_text = self.hf_tokenizer.decode(prev_tokens).encode("utf-8", errors="ignore") |
1798 |
| - return text[len(prev_text):] |
1799 |
| - else: |
1800 |
| - return self.hf_tokenizer.decode(tokens).encode("utf-8", errors="ignore") |
1801 |
| - |
1802 |
| - @classmethod |
1803 |
| - def from_pretrained(cls, pretrained_model_name_or_path: str) -> "LlamaHFTokenizer": |
1804 |
| - try: |
1805 |
| - from transformers import AutoTokenizer |
1806 |
| - except ImportError: |
1807 |
| - raise ImportError( |
1808 |
| - "The `transformers` library is required to use the `HFTokenizer`." |
1809 |
| - "You can install it with `pip install transformers`." |
1810 |
| - ) |
1811 |
| - hf_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=pretrained_model_name_or_path) |
1812 |
| - return cls(hf_tokenizer) |
1813 | 1752 |
|
1814 | 1753 |
|
1815 | 1754 | class LlamaState:
|
|
0 commit comments