8000 Add quantize example · coderonion/llama-cpp-python@b1babcf · GitHub
[go: up one dir, main page]

Skip to content

Commit b1babcf

Browse files
committed
Add quantize example
1 parent c8e13a7 commit b1babcf

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

examples/low_level_api/quantize.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import argparse
3+
import llama_cpp
4+
5+
6+
def main(args):
7+
if not os.path.exists(fname_inp):
8+
raise RuntimeError(f"Input file does not exist ({fname_inp})")
9+
if os.path.exists(fname_out):
10+
raise RuntimeError(f"Output file already exists ({fname_out})")
11+
fname_inp = args.fname_inp.encode("utf-8")
12+
fname_out = args.fname_out.encode("utf-8")
13+
itype = args.itype
14+
return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype)
15+
if return_code != 0:
16+
raise RuntimeError("Failed to quantize model")
17+
18+
19+
if __name__ == "__main__":
20+
parser = argparse.ArgumentParser()
21+
parser.add_argument("fname_inp", type=str, help="Path to input model")
22+
parser.add_argument("fname_out", type=str, help="Path to output model")
23+
parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)")
24+
args = parser.parse_args()
25+
main(args)

0 commit comments

Comments
 (0)
0