8000 Enable llama.cpp on s390x big endian platform by chenqiny · Pull Request #3552 · ggml-org/llama.cpp · GitHub
[go: up one dir, main page]

Skip to content

Enable llama.cpp on s390x big endian platform #3552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 20, 2023
Prev Previous commit
Next Next commit
Compare "GGUF" with file header char by char
1.  Set GGUF_MAGIC to "GGUF" string instead of int value
2. Compare "GGUF" char by char to ensure its byte order
3. Move bytes swap code from convert.py to gguf.py write_tensor_data
  • Loading branch information
chenqiny committed Oct 20, 2023
commit eb5b8327f688d1815f61786020468ccd8dfc53f7
2 changes: 0 additions & 2 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,8 +932,6 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM
elapsed = time.time() - start
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
padi = len(str(len(model)))
if endianess==gguf.GGUFEndian.BIG:
ndarray.byteswap(inplace=True)
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}")
of.gguf.write_tensor_data(ndarray)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ static bool is_ggml_file(const char * filename) {
if (file.size < 4) {
return false;
}
uint32_t magic = file.read_u32();
std::string magic = file.read_string(4);
return magic == GGUF_MAGIC;
}

Expand Down
19 changes: 11 additions & 8 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -20813,7 +20813,7 @@ struct gguf_kv {
};

struct gguf_header {
uint32_t magic;
char magic[4];
uint32_t version;
uint64_t n_tensors; // GGUFv2
uint64_t n_kv; // GGUFv2
Expand Down Expand Up @@ -20883,7 +20883,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset)
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));

ctx->header.magic = GGUF_MAGIC;
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
ctx->header.version = GGUF_VERSION;
ctx->header.n_tensors = 0;
ctx->header.n_kv = 0;
Expand All @@ -20909,16 +20909,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// offset from start of file
size_t offset = 0;

uint32_t magic = 0;
char magic[4];

// check the magic before making allocations
{
gguf_fread_el(file, &magic, sizeof(magic), &offset);

if (magic != GGUF_MAGIC) {
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
fclose(file);
return NULL;
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic);
fclose(file);
return NULL;
}
}
}

Expand All @@ -20928,7 +20930,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

// read the header
{
ctx->header.magic = magic;
strncpy(ctx->header.magic, magic, 4);


ctx->kv = NULL;
ctx->infos = NULL;
Expand Down
12 changes: 1 addition & 11 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,17 +231,7 @@
#define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1

#if defined(__linux__)
#include <endian.h>
#if BYTE_ORDER == LITTLE_ENDIAN
#define GGUF_MAGIC 0x46554747
#elif BYTE_ORDER == BIG_ENDIAN
#define GGUF_MAGIC 0x47475546
#endif
#else
// Use little endian magic uint_32 value
#define GGUF_MAGIC 0x46554747
#endif
#define GGUF_MAGIC "GGUF"

#define GGUF_VERSION 3

Expand Down
2 changes: 2 additions & 0 deletions gguf-py/gguf/gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
fp.write(bytes([0] * pad))

def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
if self.endianess==GGUFEndian.BIG:
tensor.byteswap(inplace=True)
self.write_padding(self.fout, self.fout.tell())
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)
Expand Down
0