From 289637a6a32db14bb7c39b08892aa9ff854eac8b Mon Sep 17 00:00:00 2001
From: Dmitry Wolf <dmitry.s.wolf@gmail.com>
Date: Wed, 15 Mar 2023 21:25:01 +0300
Subject: [PATCH 1/5] streaming conversion without pytorch

---
 convert-pth-to-ggml.py | 92 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 79 insertions(+), 13 deletions(-)

diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index d2557500af094..75e182cc1d34d 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -17,11 +17,16 @@
 # and vocabulary.
 #
 
+from collections import defaultdict
 import sys
 import json
 import struct
 import numpy as np
-import torch
+from tqdm import tqdm
+import zipfile
+import pickle
+import concurrent.futures
+
 from sentencepiece import SentencePieceProcessor
 
 if len(sys.argv) < 3:
@@ -73,19 +78,22 @@ def get_n_parts(dim):
 
 n_parts = get_n_parts(hparams["dim"])
 
-print(hparams)
-print('n_parts = ', n_parts)
+print(f'Model params.json: {hparams}')
+print(f'Parts to process: {n_parts}')
+
 
-for p in range(n_parts):
-    print('Processing part ', p)
+def get_fname(p):
+    fname = "/consolidated.0" + str(p) + ".pth"
+    return fname
 
-    #fname_model = sys.argv[1] + "/consolidated.00.pth"
-    fname_model = sys.argv[1] + "/consolidated.0" + str(p) + ".pth"
+def process_part(p):
+    fname = get_fname(p)
+    fname_model = sys.argv[1] + fname
     fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
     if (p > 0):
         fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" + "." + str(p)
 
-    model = torch.load(fname_model, map_location="cpu")
+    print(f"Processing part {fname}")
 
     fout = open(fname_out, "wb")
 
@@ -123,7 +131,58 @@ def get_n_parts(dim):
             fout.write(struct.pack("i", len(text)))
             fout.write(text)
 
-    for k, v in model.items():
+
+    def load_model(fname):
+        class Tensor():
+            def __init__(self, shape, dtype, loadinfo):
+                self.shape = shape
+                self.dtype = dtype
+                self.loadinfo = loadinfo
+                # print(shape, dtype)
+
+            def numpy(self):
+                fname_model, base_name, storage_offset, k, shape, dtype = self.loadinfo
+                with zipfile.ZipFile(fname_model, 'r') as myzip:
+                    with myzip.open(f'{base_name}/data/{k}') as myfile:
+                        bytes_size = np.dtype(self.dtype).itemsize
+                        myfile.seek(storage_offset * bytes_size, 1)
+                        ret = np.empty(shape, dtype=dtype)
+                        myfile.readinto(ret.data)
+                        return ret
+
+        def my_unpickle(datapkl, fname_model, base_name):
+            def my_rebuild_tensor(storage, storage_offset, size, stride, requires_grad, backward_hooks, metadata=None):
+                storage_type = storage[1]
+                obj_key = storage[2]
+                return Tensor(shape=size, dtype=storage_type, loadinfo=(
+                    fname_model, base_name, storage_offset,
+                    obj_key, size, storage_type
+                ))
+
+            class MyUnpickler(pickle.Unpickler):
+                def find_class(self, *p):
+                    if p == ('torch', 'HalfStorage'): return np.float16
+                    if p == ('torch', 'FloatStorage'): return np.float32
+                    if p == ('torch._utils', '_rebuild_tensor_v2'): return my_rebuild_tensor
+                    if p == ('collections', 'OrderedDict'): return dict
+                    raise ValueError(f'Unrecognized pickle {p}')
+
+                def persistent_load(self, pid):
+                    return pid
+
+            return MyUnpickler(datapkl).load()
+
+        with zipfile.ZipFile(fname, 'r') as myzip:
+            base_name = myzip.namelist()[0].split('/', 1)[0]
+            # print(myzip.namelist())
+            with myzip.open(f'{base_name}/data.pkl') as myfile:
+                model = my_unpickle(myfile, fname, base_name)
+        return model
+
+    model = load_model(fname_model)
+
+    for k, v in (t := tqdm(model.items())):
+        t.set_description(f"Processing {k} with shape {tuple(v.shape)} and type {np.dtype(v.dtype)}")
         name = k
         shape = v.shape
 
@@ -131,11 +190,11 @@ def get_n_parts(dim):
         if name[-5:] == "freqs":
             continue
 
-        print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype)
+        # print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype)
 
         #data = tf.train.load_variable(dir_model, name).squeeze()
         data = v.numpy().squeeze()
-        n_dims = len(data.shape);
+        n_dims = len(data.shape)
 
         # for efficiency - transpose some matrices
         # "model/h.*/attn/c_attn/w"
@@ -154,7 +213,7 @@ def get_n_parts(dim):
         # default type is fp16
         ftype_cur = 1
         if ftype == 0 or n_dims == 1:
-            print("  Converting to float32")
+            # print("  Converting to float32")
             data = data.astype(np.float32)
             ftype_cur = 0
 
@@ -163,7 +222,7 @@ def get_n_parts(dim):
         fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
         for i in range(n_dims):
             fout.write(struct.pack("i", dshape[n_dims - 1 - i]))
-        fout.write(sname);
+        fout.write(sname)
 
         # data
         data.tofile(fout)
@@ -175,3 +234,10 @@ def get_n_parts(dim):
 
     print("Done. Output file: " + fname_out + ", (part ", p, ")")
     print("")
+
+with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+    futures = {executor.submit(process_part, p) for p in range(n_parts)}
+    for f in (concurrent.futures.as_completed(futures)):
+        if f.exception() is not None: raise f.exception()
+
+print("All done.")
\ No newline at end of file

From bb997650be33b49519a23927d5e354f7defb4943 Mon Sep 17 00:00:00 2001
From: Dmitry Wolf <dmitry.s.wolf@gmail.com>
Date: Wed, 15 Mar 2023 22:38:28 +0300
Subject: [PATCH 2/5] separate threads for r/w ops

---
 convert-pth-to-ggml.py | 62 ++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 75e182cc1d34d..0556149582885 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -1,6 +1,5 @@
 # Convert a LLaMA model checkpoint to a ggml compatible file
 #
-# Load the model using Torch
 # Iterate over all variables and write them to a binary file.
 #
 # For each variable, write the following:
@@ -26,6 +25,9 @@
 import zipfile
 import pickle
 import concurrent.futures
+import io
+import threading
+import queue
 
 from sentencepiece import SentencePieceProcessor
 
@@ -138,24 +140,22 @@ def __init__(self, shape, dtype, loadinfo):
                 self.shape = shape
                 self.dtype = dtype
                 self.loadinfo = loadinfo
-                # print(shape, dtype)
 
             def numpy(self):
-                fname_model, base_name, storage_offset, k, shape, dtype = self.loadinfo
-                with zipfile.ZipFile(fname_model, 'r') as myzip:
-                    with myzip.open(f'{base_name}/data/{k}') as myfile:
-                        bytes_size = np.dtype(self.dtype).itemsize
-                        myfile.seek(storage_offset * bytes_size, 1)
-                        ret = np.empty(shape, dtype=dtype)
-                        myfile.readinto(ret.data)
-                        return ret
-
-        def my_unpickle(datapkl, fname_model, base_name):
+                myzip, base_name, storage_offset, k, shape, dtype = self.loadinfo
+                with myzip.open(f'{base_name}/data/{k}') as myfile:
+                    bytes_size = np.dtype(self.dtype).itemsize
+                    myfile.seek(storage_offset * bytes_size, 1)
+                    ret = np.empty(shape, dtype=dtype)
+                    myfile.readinto(ret.data)
+                    return ret
+
+        def my_unpickle(datapkl, myzip, base_name):
             def my_rebuild_tensor(storage, storage_offset, size, stride, requires_grad, backward_hooks, metadata=None):
                 storage_type = storage[1]
                 obj_key = storage[2]
                 return Tensor(shape=size, dtype=storage_type, loadinfo=(
-                    fname_model, base_name, storage_offset,
+                    myzip, base_name, storage_offset,
                     obj_key, size, storage_type
                 ))
 
@@ -172,15 +172,24 @@ def persistent_load(self, pid):
 
             return MyUnpickler(datapkl).load()
 
-        with zipfile.ZipFile(fname, 'r') as myzip:
-            base_name = myzip.namelist()[0].split('/', 1)[0]
-            # print(myzip.namelist())
-            with myzip.open(f'{base_name}/data.pkl') as myfile:
-                model = my_unpickle(myfile, fname, base_name)
+        myzip =  zipfile.ZipFile(fname, 'r')
+        base_name = myzip.namelist()[0].split('/', 1)[0]
+        with myzip.open(f'{base_name}/data.pkl') as myfile:
+            model = my_unpickle(myfile, myzip, base_name)
         return model
 
     model = load_model(fname_model)
 
+    q = queue.Queue(maxsize=2)
+
+    def writer():
+        while True:
+            item = q.get()
+            fout.write(item.getvalue())
+            q.task_done()
+
+    threading.Thread(target=writer, daemon=True).start()
+
     for k, v in (t := tqdm(model.items())):
         t.set_description(f"Processing {k} with shape {tuple(v.shape)} and type {np.dtype(v.dtype)}")
         name = k
@@ -190,8 +199,6 @@ def persistent_load(self, pid):
         if name[-5:] == "freqs":
             continue
 
-        # print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype)
-
         #data = tf.train.load_variable(dir_model, name).squeeze()
         data = v.numpy().squeeze()
         n_dims = len(data.shape)
@@ -217,23 +224,26 @@ def persistent_load(self, pid):
             data = data.astype(np.float32)
             ftype_cur = 0
 
+        memout = io.BytesIO()
         # header
         sname = name.encode('utf-8')
-        fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
+        memout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
         for i in range(n_dims):
-            fout.write(struct.pack("i", dshape[n_dims - 1 - i]))
-        fout.write(sname)
+            memout.write(struct.pack("i", dshape[n_dims - 1 - i]))
+        memout.write(sname)
 
         # data
-        data.tofile(fout)
+        memout.write(data.tobytes())
+        # data.tofile(memout)
+        q.put(memout)
+
+    q.join()
 
-    # I hope this deallocates the memory ..
     model = None
 
     fout.close()
 
     print("Done. Output file: " + fname_out + ", (part ", p, ")")
-    print("")
 
 with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
     futures = {executor.submit(process_part, p) for p in range(n_parts)}

From 0deb075a3f8bf5445f4148a2d75c8a179d42da5b Mon Sep 17 00:00:00 2001
From: Dmitry Wolf <dmitry.s.wolf@gmail.com>
Date: Wed, 15 Mar 2023 22:57:59 +0300
Subject: [PATCH 3/5] clean

---
 convert-pth-to-ggml.py | 90 +++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 46 deletions(-)

diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 0556149582885..e239b85496fe4 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -84,6 +84,50 @@ def get_n_parts(dim):
 print(f'Parts to process: {n_parts}')
 
 
+def load_model(fname):
+    class Tensor():
+        def __init__(self, shape, dtype, loadinfo):
+            self.shape = shape
+            self.dtype = dtype
+            self.loadinfo = loadinfo
+
+        def numpy(self):
+            myzip, base_name, storage_offset, k, shape, dtype = self.loadinfo
+            with myzip.open(f'{base_name}/data/{k}') as myfile:
+                bytes_size = np.dtype(self.dtype).itemsize
+                myfile.seek(storage_offset * bytes_size, 1)
+                ret = np.empty(shape, dtype=dtype)
+                myfile.readinto(ret.data)
+                return ret
+
+    def my_unpickle(datapkl, myzip, base_name):
+        def my_rebuild_tensor(storage, storage_offset, size, stride, requires_grad, backward_hooks, metadata=None):
+            storage_type = storage[1]
+            obj_key = storage[2]
+            return Tensor(shape=size, dtype=storage_type, loadinfo=(
+                myzip, base_name, storage_offset,
+                obj_key, size, storage_type
+            ))
+
+        class MyUnpickler(pickle.Unpickler):
+            def find_class(self, *p):
+                if p == ('torch', 'HalfStorage'): return np.float16
+                if p == ('torch', 'FloatStorage'): return np.float32
+                if p == ('torch._utils', '_rebuild_tensor_v2'): return my_rebuild_tensor
+                if p == ('collections', 'OrderedDict'): return dict
+                raise ValueError(f'Unrecognized pickle {p}')
+
+            def persistent_load(self, pid):
+                return pid
+
+        return MyUnpickler(datapkl).load()
+
+    myzip =  zipfile.ZipFile(fname, 'r')
+    base_name = myzip.namelist()[0].split('/', 1)[0]
+    with myzip.open(f'{base_name}/data.pkl') as myfile:
+        model = my_unpickle(myfile, myzip, base_name)
+    return model
+
 def get_fname(p):
     fname = "/consolidated.0" + str(p) + ".pth"
     return fname
@@ -133,51 +177,6 @@ def process_part(p):
             fout.write(struct.pack("i", len(text)))
             fout.write(text)
 
-
-    def load_model(fname):
-        class Tensor():
-            def __init__(self, shape, dtype, loadinfo):
-                self.shape = shape
-                self.dtype = dtype
-                self.loadinfo = loadinfo
-
-            def numpy(self):
-                myzip, base_name, storage_offset, k, shape, dtype = self.loadinfo
-                with myzip.open(f'{base_name}/data/{k}') as myfile:
-                    bytes_size = np.dtype(self.dtype).itemsize
-                    myfile.seek(storage_offset * bytes_size, 1)
-                    ret = np.empty(shape, dtype=dtype)
-                    myfile.readinto(ret.data)
-                    return ret
-
-        def my_unpickle(datapkl, myzip, base_name):
-            def my_rebuild_tensor(storage, storage_offset, size, stride, requires_grad, backward_hooks, metadata=None):
-                storage_type = storage[1]
-                obj_key = storage[2]
-                return Tensor(shape=size, dtype=storage_type, loadinfo=(
-                    myzip, base_name, storage_offset,
-                    obj_key, size, storage_type
-                ))
-
-            class MyUnpickler(pickle.Unpickler):
-                def find_class(self, *p):
-                    if p == ('torch', 'HalfStorage'): return np.float16
-                    if p == ('torch', 'FloatStorage'): return np.float32
-                    if p == ('torch._utils', '_rebuild_tensor_v2'): return my_rebuild_tensor
-                    if p == ('collections', 'OrderedDict'): return dict
-                    raise ValueError(f'Unrecognized pickle {p}')
-
-                def persistent_load(self, pid):
-                    return pid
-
-            return MyUnpickler(datapkl).load()
-
-        myzip =  zipfile.ZipFile(fname, 'r')
-        base_name = myzip.namelist()[0].split('/', 1)[0]
-        with myzip.open(f'{base_name}/data.pkl') as myfile:
-            model = my_unpickle(myfile, myzip, base_name)
-        return model
-
     model = load_model(fname_model)
 
     q = queue.Queue(maxsize=2)
@@ -234,7 +233,6 @@ def writer():
 
         # data
         memout.write(data.tobytes())
-        # data.tofile(memout)
         q.put(memout)
 
     q.join()

From de17acac72ef1f3f16727aa1151a03f67740a783 Mon Sep 17 00:00:00 2001
From: Dmitry Wolf <dmitry.s.wolf@gmail.com>
Date: Wed, 15 Mar 2023 23:24:48 +0300
Subject: [PATCH 4/5] readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0b2532a09a3b2..0c314a0a1c9ab 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,7 @@ ls ./models
 65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
 
 # install Python dependencies
-python3 -m pip install torch numpy sentencepiece
+python3 -m pip install tqdm numpy sentencepiece
 
 # convert the 7B model to ggml FP16 format
 python3 convert-pth-to-ggml.py models/7B/ 1

From 5f0ada21bf8ac8a69231f28cc40b1aa59bfcaeb4 Mon Sep 17 00:00:00 2001
From: Dmitry Wolf <dmitry.s.wolf@gmail.com>
Date: Thu, 16 Mar 2023 00:30:07 +0300
Subject: [PATCH 5/5] fix

---
 convert-pth-to-ggml.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index db30a7e1ffca3..bef3618cb7281 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -184,12 +184,15 @@ def process_part(p):
     def writer():
         while True:
             item = q.get()
+            if item is None:
+                q.task_done()
+                break
             fout.write(item.getvalue())
             q.task_done()
 
     threading.Thread(target=writer, daemon=True).start()
 
-    for k, v in (t := tqdm(model.items())):
+    for k, v in (t := tqdm(model.items(), bar_format="{r_bar} {percentage:3.0f}% |{bar:50} | {desc}")):
         t.set_description(f"Processing {k} with shape {tuple(v.shape)} and type {np.dtype(v.dtype)}")
         name = k
         shape = v.shape
@@ -235,6 +238,7 @@ def writer():
         memout.write(data.tobytes())
         q.put(memout)
 
+    q.put(None)
     q.join()
 
     model = None