From 25ef27c6bcd121c34bf4194d876393b1919daf9a Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Sun, 19 Mar 2023 18:38:42 -0300 Subject: [PATCH 1/5] Improve performance by changing std::map to std::unordered_map and std::map id_to_token; to std::vector id_to_token; --- main.cpp | 5 +++-- quantize.cpp | 1 + utils.cpp | 6 +++--- utils.h | 8 ++++---- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/main.cpp b/main.cpp index c7186e0dff273..fd3ba58a33b5a 100644 --- a/main.cpp +++ b/main.cpp @@ -31,7 +31,7 @@ static const int EOS_TOKEN_ID = 2; // determine number of model parts based on the dimension -static const std::map LLAMA_N_PARTS = { +static const std::unordered_map LLAMA_N_PARTS = { { 4096, 1 }, { 5120, 2 }, { 6656, 4 }, @@ -85,7 +85,7 @@ struct llama_model { // struct ggml_context * ctx; - std::map tensors; + std::unordered_map tensors; }; // load the model's weights from a file @@ -147,6 +147,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab // load vocab { std::string word; + vocab.id_to_token.resize(model.hparams.n_vocab); for (int i = 0; i < model.hparams.n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); diff --git a/quantize.cpp b/quantize.cpp index 14c7b277a4024..63ff750b31276 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -113,6 +113,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna } std::string word; + vocab.id_to_token.resize(n_vocab); for (int i = 0; i < n_vocab; i++) { uint32_t len; finp.read ((char *) &len, sizeof(len)); diff --git a/utils.cpp b/utils.cpp index 08d5c6ba625f2..762edf547934b 100644 --- a/utils.cpp +++ b/utils.cpp @@ -148,8 +148,8 @@ void replace(std::string & str, const std::string & needle, const std::string & } } -std::map json_parse(const std::string & fname) { - std::map result; +std::unordered_map json_parse(const std::string & fname) { + std::unordered_map result; // read file into string std::string json; @@ -334,7 +334,7 @@ std::vector llama_tokenize(const gpt_vocab & vocab, const std::st break; } res.push_back(token_id); - auto token = (*vocab.id_to_token.find(token_id)).second; + const auto &token = vocab.id_to_token.at(token_id); i -= token.length(); } diff --git a/utils.h b/utils.h index 49658f7d9441e..d69462a576f25 100644 --- a/utils.h +++ b/utils.h @@ -3,7 +3,7 @@ #pragma once #include -#include +#include #include #include #include @@ -56,14 +56,14 @@ struct gpt_vocab { using id = int32_t; using token = std::string; - std::map token_to_id; - std::map id_to_token; + std::unordered_map token_to_id; + std::vector id_to_token; }; void replace(std::string & str, const std::string & needle, const std::string & replacement); // poor-man's JSON parsing -std::map json_parse(const std::string & fname); +std::unordered_map json_parse(const std::string & fname); // split text into tokens // From 78b964eb1ef41cb837e001bdf4fd7be92518642b Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Sun, 19 Mar 2023 18:55:09 -0300 Subject: [PATCH 2/5] fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); --- utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/utils.cpp b/utils.cpp index 762edf547934b..9975463268f86 100644 --- a/utils.cpp +++ b/utils.cpp @@ -353,6 +353,7 @@ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { vocab.token_to_id = ::json_parse(fname); + vocab.id_to_token.resize(vocab.token_to_id.size()); for (const auto & kv : vocab.token_to_id) { vocab.id_to_token[kv.second] = kv.first; } From 40ab4861ccfd52f32387105b429de19939ffeb63 Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Sun, 19 Mar 2023 19:32:51 -0300 Subject: [PATCH 3/5] Removed include --- main.cpp | 1 - quantize.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/main.cpp b/main.cpp index fd3ba58a33b5a..bf1970a98766e 100644 --- a/main.cpp +++ b/main.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/quantize.cpp b/quantize.cpp index 63ff750b31276..d24e64e1a4840 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include From 34596530b2f922a387bd787903ee290c5f048a4d Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Mon, 20 Mar 2023 21:28:14 -0300 Subject: [PATCH 4/5] Nest struct token score inside gpt_vocab --- utils.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/utils.h b/utils.h index a3c2955c97e1d..a6600dae1614b 100644 --- a/utils.h +++ b/utils.h @@ -52,16 +52,15 @@ std::string gpt_random_prompt(std::mt19937 & rng); // Vocab utils // -struct token_score { - using token_t = std::string; - token_t token; - float score; -}; - struct gpt_vocab { using id = int32_t; using token = std::string; + struct token_score { + token token; + float score; + }; + std::unordered_map token_to_id; std::vector id_to_token; }; From a19aa63ba29cc1c2b8370ad1d3993939afda13e8 Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Tue, 21 Mar 2023 11:54:04 -0300 Subject: [PATCH 5/5] renamed token to tok --- main.cpp | 8 ++++---- quantize.cpp | 2 +- utils.cpp | 2 +- utils.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/main.cpp b/main.cpp index 6d1c14b743d54..dc8876dc0b7a4 100644 --- a/main.cpp +++ b/main.cpp @@ -175,7 +175,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab vocab.token_to_id[word] = i; auto &tok_score = vocab.id_to_token[i]; - tok_score.token = word; + tok_score.tok = word; tok_score.score = score; //if (i < 30000) { @@ -896,7 +896,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).token.c_str()); + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str()); } fprintf(stderr, "\n"); if (params.interactive) { @@ -918,7 +918,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.at(apindex).c_str()); fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size()); for (int i = 0; i < (int) antiprompt_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).token.c_str()); + fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).tok.c_str()); } fprintf(stderr, "\n"); } @@ -1024,7 +1024,7 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { for (auto id : embd) { - printf("%s", vocab.id_to_token[id].token.c_str()); + printf("%s", vocab.id_to_token[id].tok.c_str()); } fflush(stdout); } diff --git a/quantize.cpp b/quantize.cpp index bc7b975239a7c..129518e2b526f 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -146,7 +146,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna vocab.token_to_id[word] = i; auto &tok_score = vocab.id_to_token[i]; - tok_score.token = word; + tok_score.tok = word; tok_score.score = score; } } diff --git a/utils.cpp b/utils.cpp index 509fc92122ee0..5481b63808be0 100644 --- a/utils.cpp +++ b/utils.cpp @@ -444,7 +444,7 @@ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { vocab.id_to_token.resize(vocab.token_to_id.size()); for (const auto & kv : vocab.token_to_id) { - vocab.id_to_token[kv.second].token = kv.first; + vocab.id_to_token[kv.second].tok = kv.first; } printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size()); diff --git a/utils.h b/utils.h index a6600dae1614b..f91688bea38da 100644 --- a/utils.h +++ b/utils.h @@ -57,7 +57,7 @@ struct gpt_vocab { using token = std::string; struct token_score { - token token; + token tok; float score; };