From 25ef27c6bcd121c34bf4194d876393b1919daf9a Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Sun, 19 Mar 2023 18:38:42 -0300
Subject: [PATCH 1/5] Improve performance by changing std::map to
 std::unordered_map and std::map<id, token> id_to_token; to std::vector<token>
 id_to_token;

---
 main.cpp     | 5 +++--
 quantize.cpp | 1 +
 utils.cpp    | 6 +++---
 utils.h      | 8 ++++----
 4 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/main.cpp b/main.cpp
index c7186e0dff273..fd3ba58a33b5a 100644
--- a/main.cpp
+++ b/main.cpp
@@ -31,7 +31,7 @@
 static const int EOS_TOKEN_ID = 2;
 
 // determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
     { 4096, 1 },
     { 5120, 2 },
     { 6656, 4 },
@@ -85,7 +85,7 @@ struct llama_model {
 
     //
     struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
 };
 
 // load the model's weights from a file
@@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
     // load vocab
     {
         std::string word;
+        vocab.id_to_token.resize(model.hparams.n_vocab);
         for (int i = 0; i < model.hparams.n_vocab; i++) {
             uint32_t len;
             fin.read((char *) &len, sizeof(len));
diff --git a/quantize.cpp b/quantize.cpp
index 14c7b277a4024..63ff750b31276 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
         }
 
         std::string word;
+        vocab.id_to_token.resize(n_vocab);
         for (int i = 0; i < n_vocab; i++) {
             uint32_t len;
             finp.read ((char *) &len, sizeof(len));
diff --git a/utils.cpp b/utils.cpp
index 08d5c6ba625f2..762edf547934b 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
     }
 }
 
-std::map<std::string, int32_t> json_parse(const std::string & fname) {
-    std::map<std::string, int32_t> result;
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
+    std::unordered_map<std::string, int32_t> result;
 
     // read file into string
     std::string json;
@@ -334,7 +334,7 @@ std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::st
 	    break;
         }
         res.push_back(token_id);
-        auto token = (*vocab.id_to_token.find(token_id)).second;
+        const auto &token = vocab.id_to_token.at(token_id);
         i -= token.length();
     }
 
diff --git a/utils.h b/utils.h
index 49658f7d9441e..d69462a576f25 100644
--- a/utils.h
+++ b/utils.h
@@ -3,7 +3,7 @@
 #pragma once
 
 #include <string>
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include <random>
 #include <thread>
@@ -56,14 +56,14 @@ struct gpt_vocab {
     using id    = int32_t;
     using token = std::string;
 
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
+    std::unordered_map<token, id> token_to_id;
+    std::vector<token> id_to_token;
 };
 
 void replace(std::string & str, const std::string & needle, const std::string & replacement);
 
 // poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
 
 // split text into tokens
 //

From 78b964eb1ef41cb837e001bdf4fd7be92518642b Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Sun, 19 Mar 2023 18:55:09 -0300
Subject: [PATCH 2/5] fix last commit on gpt_vocab_init add
 vocab.id_to_token.resize(vocab.token_to_id.size());

---
 utils.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils.cpp b/utils.cpp
index 762edf547934b..9975463268f86 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -353,6 +353,7 @@ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
 
     vocab.token_to_id = ::json_parse(fname);
 
+    vocab.id_to_token.resize(vocab.token_to_id.size());
     for (const auto & kv : vocab.token_to_id) {
         vocab.id_to_token[kv.second] = kv.first;
     }

From 40ab4861ccfd52f32387105b429de19939ffeb63 Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Sun, 19 Mar 2023 19:32:51 -0300
Subject: [PATCH 3/5] Removed include <map>

---
 main.cpp     | 1 -
 quantize.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/main.cpp b/main.cpp
index fd3ba58a33b5a..bf1970a98766e 100644
--- a/main.cpp
+++ b/main.cpp
@@ -8,7 +8,6 @@
 #include <cstring>
 #include <fstream>
 #include <iostream>
-#include <map>
 #include <string>
 #include <vector>
 
diff --git a/quantize.cpp b/quantize.cpp
index 63ff750b31276..d24e64e1a4840 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -7,7 +7,6 @@
 #include <cstdio>
 #include <cstring>
 #include <fstream>
-#include <map>
 #include <string>
 #include <vector>
 #include <regex>

From 34596530b2f922a387bd787903ee290c5f048a4d Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Mon, 20 Mar 2023 21:28:14 -0300
Subject: [PATCH 4/5] Nest struct token score inside gpt_vocab

---
 utils.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/utils.h b/utils.h
index a3c2955c97e1d..a6600dae1614b 100644
--- a/utils.h
+++ b/utils.h
@@ -52,16 +52,15 @@ std::string gpt_random_prompt(std::mt19937 & rng);
 // Vocab utils
 //
 
-struct token_score {
-    using token_t = std::string;
-    token_t token;
-    float score;
-};
-
 struct gpt_vocab {
     using id    = int32_t;
     using token = std::string;
 
+    struct token_score {
+        token token;
+        float score;
+    };
+
     std::unordered_map<token, id> token_to_id;
     std::vector<token_score> id_to_token;
 };

From a19aa63ba29cc1c2b8370ad1d3993939afda13e8 Mon Sep 17 00:00:00 2001
From: Fabio Rossini Sluzala <fabio3rs@gmail.com>
Date: Tue, 21 Mar 2023 11:54:04 -0300
Subject: [PATCH 5/5] renamed token to tok

---
 main.cpp     | 8 ++++----
 quantize.cpp | 2 +-
 utils.cpp    | 2 +-
 utils.h      | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/main.cpp b/main.cpp
index 6d1c14b743d54..dc8876dc0b7a4 100644
--- a/main.cpp
+++ b/main.cpp
@@ -175,7 +175,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
             vocab.token_to_id[word] = i;
 
             auto &tok_score = vocab.id_to_token[i];
-            tok_score.token = word;
+            tok_score.tok = word;
             tok_score.score = score;
 
             //if (i < 30000) {
@@ -896,7 +896,7 @@ int main(int argc, char ** argv) {
     fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
     fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
     for (int i = 0; i < (int) embd_inp.size(); i++) {
-        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).token.c_str());
+        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
     }
     fprintf(stderr, "\n");
     if (params.interactive) {
@@ -918,7 +918,7 @@ int main(int argc, char ** argv) {
                 fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.at(apindex).c_str());
                 fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size());
                 for (int i = 0; i < (int) antiprompt_inp.size(); i++) {
-                    fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).token.c_str());
+                    fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).tok.c_str());
                 }
                 fprintf(stderr, "\n");
             }
@@ -1024,7 +1024,7 @@ int main(int argc, char ** argv) {
         // display text
         if (!input_noecho) {
             for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].token.c_str());
+                printf("%s", vocab.id_to_token[id].tok.c_str());
             }
             fflush(stdout);
         }
diff --git a/quantize.cpp b/quantize.cpp
index bc7b975239a7c..129518e2b526f 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -146,7 +146,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
             vocab.token_to_id[word] = i;
 
             auto &tok_score = vocab.id_to_token[i];
-            tok_score.token = word;
+            tok_score.tok = word;
             tok_score.score = score;
         }
     }
diff --git a/utils.cpp b/utils.cpp
index 509fc92122ee0..5481b63808be0 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -444,7 +444,7 @@ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
 
     vocab.id_to_token.resize(vocab.token_to_id.size());
     for (const auto & kv : vocab.token_to_id) {
-        vocab.id_to_token[kv.second].token = kv.first;
+        vocab.id_to_token[kv.second].tok = kv.first;
     }
 
     printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size());
diff --git a/utils.h b/utils.h
index a6600dae1614b..f91688bea38da 100644
--- a/utils.h
+++ b/utils.h
@@ -57,7 +57,7 @@ struct gpt_vocab {
     using token = std::string;
 
     struct token_score {
-        token token;
+        token tok;
         float score;
     };