Merge unordered_map/vector changes with trunk updates

ggml-org · ggerganov · Mar 21, 2023 · Mar 19, 2023 · Mar 19, 2023 · Mar 19, 2023
commit ef792ae8bdae3d1f7e2dc04b691676b8bbd099eb
diff --git a/main.cpp b/main.cpp
@@ -173,8 +173,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
             fin.read((char *) &score, sizeof(score));
 
             vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+
+            auto &tok_score = vocab.id_to_token[i];
+            tok_score.token = word;
+            tok_score.score = score;
 
             //if (i < 30000) {
             //    fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
@@ -894,7 +896,7 @@ int main(int argc, char ** argv) {
     fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
     fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
     for (int i = 0; i < (int) embd_inp.size(); i++) {
-        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).token.c_str());
     }
     fprintf(stderr, "\n");
     if (params.interactive) {
@@ -916,7 +918,7 @@ int main(int argc, char ** argv) {
                 fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.at(apindex).c_str());
                 fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size());
                 for (int i = 0; i < (int) antiprompt_inp.size(); i++) {
-                    fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).c_str());
+                    fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).token.c_str());
                 }
                 fprintf(stderr, "\n");
             }
@@ -1022,7 +1024,7 @@ int main(int argc, char ** argv) {
         // display text
         if (!input_noecho) {
             for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].c_str());
+                printf("%s", vocab.id_to_token[id].token.c_str());
             }
             fflush(stdout);
         }

diff --git a/quantize.cpp b/quantize.cpp
@@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
             fout.write((char *) &score, sizeof(score));
 
             vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+
+            auto &tok_score = vocab.id_to_token[i];
+            tok_score.token = word;
+            tok_score.score = score;
         }
     }
 

diff --git a/utils.cpp b/utils.cpp
@@ -394,9 +394,39 @@ struct llama_tokenizer {
         if (left == -1 || right == -1) {
             return;
         }
-        res.push_back(token_id);
-        const auto &token = vocab.id_to_token.at(token_id);
-        i -= token.length();
+
+        std::string_view text(symbols_[left].text.data(), symbols_[left].text.size() + symbols_[right].text.size());
+        auto token = vocab_.token_to_id.find(std::string(text));
+
+        if (token == vocab_.token_to_id.end()) {
+            return;
+        }
+
+        if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
+            return;
+        }
+
+        const auto &tok_score = vocab_.id_to_token[(*token).second];
+
+        llama_sp_bigram bigram;
+        bigram.left = left;
+        bigram.right = right;
+        bigram.score = tok_score.score;
+        bigram.size = text.size();
+        work_queue_.push(bigram);
+    }
+
+    const gpt_vocab & vocab_;
+    std::vector<llama_sp_symbol> symbols_;
+    llama_sp_bigram::queue work_queue_;
+};
+
+std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, std::string_view text, bool bos) {
+    llama_tokenizer tokenizer(vocab);
+    std::vector<gpt_vocab::id> output;
+
+    if (text.size() == 0) {
+        return output;
     }
 
     if (bos) {
@@ -414,7 +444,7 @@ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
 
     vocab.id_to_token.resize(vocab.token_to_id.size());
     for (const auto & kv : vocab.token_to_id) {
-        vocab.id_to_token[kv.second] = kv.first;
+        vocab.id_to_token[kv.second].token = kv.first;
     }
 
     printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size());

diff --git a/utils.h b/utils.h
@@ -52,12 +52,18 @@ std::string gpt_random_prompt(std::mt19937 & rng);
 // Vocab utils
 //
 
+struct token_score {
+    using token_t = std::string;
+    token_t token;
+    float score;
+};
+
 struct gpt_vocab {
     using id    = int32_t;
     using token = std::string;
 
     std::unordered_map<token, id> token_to_id;
-    std::vector<token> id_to_token;
+    std::vector<token_score> id_to_token;
 };
 
 void replace(std::string & str, const std::string & needle, const std::string & replacement);