10000 llama : load tensors using pre-computed model hash · ggml-org/llama.cpp@64edd68 · GitHub
[go: up one dir, main page]

Skip to content

Commit 64edd68

Browse files
committed
llama : load tensors using pre-computed model hash
1 parent 141a908 commit 64edd68

File tree

8 files changed

+276
-21
lines changed

8 files changed

+276
-21
lines changed

examples/gguf-hash/gguf-hash.cpp

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ typedef enum {
5555

5656
struct hash_params {
5757
std::string input;
58+
bool fnv = false;
5859
bool xxh64 = false;
5960
bool sha1 = false;
6061
bool sha256 = false;
@@ -103,6 +104,7 @@ static void hash_print_usage(const char * executable) {
103104
printf("\n");
104105
printf("options:\n");
105106
printf(" -h, --help show this help message and exit\n");
107+
printf(" --fnv use FNV-1a hash\n");
106108
printf(" --xxh64 use xxh64 hash\n");
107109
printf(" --sha1 use sha1 hash\n");
108110
printf(" --sha256 use sha256 hash\n");
@@ -131,6 +133,11 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
131133
exit(0);
132134
}
133135

136+
if (arg == "--fnv") {
137+
arg_found = true;
138+
params.fnv = true;
139+
}
140+
134141
if (arg == "--xxh64") {
135142
arg_found = true;
136143
params.xxh64 = true;
@@ -188,6 +195,26 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
188195
params.input = argv[arg_idx++];
189196
}
190197

198+
struct fnv_ctx {
199+
uint64_t hash;
200+
};
201+
202+
static void fnv_init(fnv_ctx * ctx) {
203+
ctx->hash = 0xcbf29ce484222325ULL;
204+
}
205+
206+
static void fnv_update(fnv_ctx * ctx, const uint8_t * data, size_t len) {
207+
const uint64_t fnv_prime = 0x100000001b3ULL;
208+
for (size_t i = 0; i < len; ++i) {
209+
ctx->hash ^= data[i];
210+
ctx->hash *= fnv_prime;
211+
}
212+
}
213+
214+
static void fnv_final(fnv_ctx * ctx, uint64_t * digest) {
215+
*digest = ctx->hash;
216+
}
217+
191218
static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
192219
bool result = true;
193220
try {
@@ -306,6 +333,12 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
306333
}
307334
}
308335

336+
// FNV init
337+
fnv_ctx fnv_model_hash_ctx;
338+
if (hash_params.fnv) {
339+
fnv_init(&fnv_model_hash_ctx);
340+
}
341+
309342
// sha1 init
310343
SHA1_CTX sha1_model_hash_ctx;
311344
if (hash_params.sha1) {
@@ -326,7 +359,11 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
326359
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
327360
}
328361

362+
struct gguf_context * ctx_out = gguf_init_empty();
329363
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
364+
365+
gguf_set_kv(ctx_out, ctx);
366+
330367
const int n_tensors = gguf_get_n_tensors(ctx);
331368
bool tensor_layer_in_manifest = false;
332369
bool model_in_manifest = false;
@@ -335,10 +372,16 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
335372
for (int i = 0; i < n_tensors; ++i) {
336373
const char * name = gguf_get_tensor_name(ctx, i);
337374
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
375+
gguf_add_tensor(ctx_out, cur);
338376
auto n_bytes = ggml_nbytes(cur);
339377
auto *raw_data = cur->data;
340378
const std::string tensor_layer_name = fname + ":" + name;
341379

380+
if (hash_params.fnv) {
381+
// Overall Model Hash
382+
fnv_update(&fnv_model_hash_ctx, (const uint8_t *)raw_data, n_bytes);
383+
}
384+
342385
if (hash_params.xxh64) {
343386

344387
if (!hash_params.no_layer) {
@@ -455,6 +498,18 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
455498
}
456499
}
457500

501+
if (hash_params.fnv) {
502+
uint64_t hash;
503+
fnv_final(&fnv_model_hash_ctx, &hash);
504+
char hex_result[17];
505+
for (int offset = 0; offset < 8; offset++) {
506+
unsigned int shift_bits_by = (8 * (8 - offset - 1));
507+
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
508+
}
509+
printf("%-8s %-s %s\n", "fnv", hex_result, fname.c_str());
510+
gguf_set_val_u64(ctx_out, "model_hash", hash);
511+
}
512+
458513
if (hash_params.xxh64) {
459514
XXH64_hash_t const hash = XXH64_digest(xxh64_model_hash_state);
460515

@@ -580,6 +635,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
580635
}
581636
}
582637

638+
auto fname_out = fname + ".rpc";
639+
gguf_write_to_file(ctx_out, fname_out.c_str(), false);
640+
gguf_free(ctx_out);
583641

584642
ggml_free(ctx_data);
585643
gguf_free(ctx);
@@ -663,7 +721,7 @@ int main(int argc, const char ** argv) {
663721

664722
// Autoselect the highest security hash if manifest is provided but
665723
// the user has not specifically defined the hash they care about
666-
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
724+
if (!params.fnv && !params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
667725
// User has not selected a specific value, pick most secure hash
668726
if (manifest_check.sha256) {
669727
params.sha256 = true;
@@ -680,7 +738,7 @@ int main(int argc, const char ** argv) {
680738
}
681739

682740
// By default if no swich argument provided, assume xxh64
683-
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
741+
if (!params.fnv && !params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
684742
params.xxh64 = true;
685743
}
686744

ggml/include/ggml-rpc.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ extern "C" {
88
#endif
99

1010
#define RPC_PROTO_MAJOR_VERSION 2
11-
#define RPC_PROTO_MINOR_VERSION 0
11+
#define RPC_PROTO_MINOR_VERSION 1
1212
#define RPC_PROTO_PATCH_VERSION 0
1313
#define GGML_RPC_MAX_SERVERS 16
1414

@@ -21,12 +21,18 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
2121
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
2222

2323
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
24-
const char * cache_dir,
24+
const char * model_file, const char * cache_dir,
2525
size_t free_mem, size_t total_mem);
2626

2727
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
2828

2929
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
30+
GGML_BACKEND_API bool ggml_backend_rpc_buffer_load_tensor(ggml_backend_buffer_t buffer,
31+
ggml_tensor * tensor,
32+
const char * path,
33+
size_t file_offset,
34+
size_t tensor_offset,
35+
size_t size);
3036

3137
#ifdef __cplusplus
3238
}

0 commit comments

Comments
 (0)
0