8000 feat: fill-in-middle support (#1386) · coderonion/llama-cpp-python@4a7122d · GitHub
[go: up one dir, main page]

Skip to content

Commit 4a7122d

Browse files
CISCabetlen
andauthored
feat: fill-in-middle support (abetlen#1386)
* Proper fill-in-middle support Use prefix/middle/suffix tokens when metadata is present in GGUF, like f.ex. in [this](https://huggingface.co/CISCai/CodeQwen1.5-7B-Chat-SOTA-GGUF) one. * fall back to internal prefix/middle/suffix id In some cases llama.cpp will make a guess at fim tokens, use them if there's no metadata. * typo-- * don't insert special tokens that are not there in suffix Note: add_bos is misnamed, it's actually add_special and can cause several special tokens to be added to the token list (the special parameter is actually parse_special). * don't add/parse any special tokens when using fim I've left original behavior when no fim tokens are found, but this should perhaps be re-evaluated. * don't append suffix to prompt_tokens unless fim tokens are detected * make sure we only do this for fim --------- Co-authored-by: Andrei <abetlen@gmail.com>
1 parent 228949c commit 4a7122d

File tree

1 file changed

+41
-6
lines changed

1 file changed

+41
-6
lines changed

llama_cpp/llama.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -955,18 +955,53 @@ def _create_completion(
955955

956956
completion_id: str = f"cmpl-{str(uuid.uuid4())}"
957957
created: int = int(time.time())
958+
prefix_token_id: int = int(self.metadata.get("tokenizer.ggml.prefix_token_id", self._model.token_prefix()))
959+
middle_token_id: int = int(self.metadata.get("tokenizer.ggml.middle_token_id", self._model.token_middle()))
960+
suffix_token_id: int = int(self.metadata.get("tokenizer.ggml.suffix_token_id", self._model.token_suffix()))
958961
# If prompt is empty, initialize completion with BOS token to avoid
959962
# detokenization including a space at the beginning of the completion
960963
completion_tokens: List[int] = [] if len(prompt) > 0 else [self.token_bos()]
961964
# Add blank space to start of prompt to match OG llama tokenizer
962965
prompt_tokens: List[int] = (
963966
(
964-
self.tokenize(prompt.encode("utf-8"), special=True)
965-
if prompt != ""
966-
else [self.token_bos()]
967+
[prefix_token_id]
968+
if prefix_token_id >= 0 and suffix is not None
969+
else []
970+
)
971+
+
972+
(
973+
(
974+
self.tokenize(prompt.encode("utf-8"), add_bos=(prefix_token_id < 0 or suffix is None), special=(prefix_token_id < 0 or suffix is None))
975+
if prompt != ""
976+
else (
977+
[]
978+
if prefix_token_id >= 0 and suffix is not None
979+
else [self.token_bos()]
980+
)
981+
)
982+
if isinstance(prompt, str)
983+
else prompt
984+
)
985+
+
986+
(
987+
(
988+
[suffix_token_id]
989+
+
990+
(
991+
self.tokenize(suffix.encode("utf-8"), add_bos=False, special=False)
992+
if suffix
993+
else []
994+
)
995+
)
996+
if suffix_token_id >= 0 and suffix is not None
997+
else []
998+
)
999+
+
1000+
(
1001+
[middle_token_id]
1002+
if middle_token_id >= 0 and suffix is not None
1003+
else []
9671004
)
968-
if isinstance(prompt, str)
969-
else prompt
9701005
)
9711006
text: bytes = b""
9721007
returned_tokens: int = 0
@@ -1346,7 +1381,7 @@ def logit_bias_processor(
13461381
if echo:
13471382
text_str = prompt + text_str
13481383

1349-
if suffix is not None:
1384+
if suffix_token_id < 0 and suffix is not None:
13501385
text_str = text_str + suffix
13511386

13521387
logprobs_or_none: Optional[CompletionLogprobs] = None

0 commit comments

Comments
 (0)
0