8000 `quantize`: add imatrix and dataset metadata in GGUF by phymbert · Pull Request #6658 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

quantize: add imatrix and dataset metadata in GGUF #6658

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
quantize: add imatrix m_last_call as quantize.imatrix.chunks_count
  • Loading branch information
phymbert committed Apr 13, 2024
commit 851de160dd3b4b3f48eac94e914aaa861eaa2ddd
3 changes: 3 additions & 0 deletions examples/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co
if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
}

// Write the number of call the matrix was computed with
out.write((const char *) &m_last_call, sizeof(m_last_call));

// Write the dataset name at the end of the file to later on specify it in quantize
int n_dataset = strlen(dataset);
out.write((const char *) &n_dataset, sizeof(n_dataset));
Expand Down
28 changes: 21 additions & 7 deletions examples/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
};

static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset";
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.n_entries";
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count";
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count";

static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
std::string ftype_str;
Expand Down Expand Up @@ -113,7 +114,7 @@ static void usage(const char * executable) {
exit(1);
}

static void load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
if (!in) {
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
Expand Down Expand Up @@ -162,7 +163,9 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix
}

// latest imatrix version contains the dataset filename at the end of the file
int m_last_call = 0;
if (in.peek() != EOF) {
in.read((char *)&m_last_call, sizeof(m_last_call));
int dataset_len;
in.read((char *)&dataset_len, sizeof(dataset_len));
std::vector<char> dataset_as_vec(dataset_len+1);
Expand All @@ -171,19 +174,21 @@ static void load_imatrix(const std::string & imatrix_file, std::string & imatrix
imatrix_dataset = std::string{dataset_as_vec.data()};
printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
}
printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call);
return m_last_call;
}

static void prepare_imatrix(const std::string & imatrix_file,
static int prepare_imatrix(const std::string & imatrix_file,
std::string & imatrix_dataset,
const std::vector<std::string> & included_weights,
const std::vector<std::string> & excluded_weights,
std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
int m_last_call = -1;
if (!imatrix_file.empty()) {
load_imatrix(imatrix_file, imatrix_dataset, imatrix_data);
m_last_call = load_imatrix(imatrix_file, imatrix_dataset, imatrix_data);
}
if (imatrix_data.empty()) {
return;
return m_last_call;
}
if (!excluded_weights.empty()) {
for (auto& name : excluded_weights) {
Expand All @@ -209,6 +214,7 @@ static void prepare_imatrix(const std::string & imatrix_file,
if (!imatrix_data.empty()) {
printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size()));
}
return m_last_call;
}

static ggml_type parse_ggml_type(const char * arg) {
Expand Down Expand Up @@ -291,7 +297,7 @@ int main(int argc, char ** argv) {

std::string imatrix_dataset;
std::unordered_map<std::string, std::vector<float>> imatrix_data;
prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data);
int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data);
if (!imatrix_data.empty()) {
params.imatrix = &imatrix_data;
if (!imatrix_dataset.empty()) {
Expand All @@ -309,6 +315,14 @@ int main(int argc, char ** argv) {
kvo.int_value = imatrix_data.size();
kv_overrides.emplace_back(std::move(kvo));
}

if (m_last_call > 0) {
llama_model_kv_override kvo;
std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS);
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
kvo.int_value = m_last_call;
kv_overrides.emplace_back(std::move(kvo));
}
}
if (!kv_overrides.empty()) {
kv_overrides.emplace_back();
Expand Down
0