From 06ccb41b4a81a880895f3e52ef439865ef1e75a7 Mon Sep 17 00:00:00 2001 From: Ivan Bondyrev Date: Fri, 26 Apr 2024 15:02:05 +0200 Subject: [PATCH] fix quantize example --- examples/low_level_api/quantize.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/low_level_api/quantize.py b/examples/low_level_api/quantize.py index 8bd03f88a..cdb038a71 100644 --- a/examples/low_level_api/quantize.py +++ b/examples/low_level_api/quantize.py @@ -4,14 +4,16 @@ def main(args): + fname_inp = args.fname_inp.encode("utf-8") + fname_out = args.fname_out.encode("utf-8") if not os.path.exists(fname_inp): raise RuntimeError(f"Input file does not exist ({fname_inp})") if os.path.exists(fname_out): raise RuntimeError(f"Output file already exists ({fname_out})") - fname_inp = args.fname_inp.encode("utf-8") - fname_out = args.fname_out.encode("utf-8") - itype = args.itype - return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype) + ftype = args.type + args = llama_cpp.llama_model_quantize_default_params() + args.ftype = ftype + return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args) if return_code != 0: raise RuntimeError("Failed to quantize model") @@ -20,6 +22,7 @@ def main(args): parser = argparse.ArgumentParser() parser.add_argument("fname_inp", type=str, help="Path to input model") parser.add_argument("fname_out", type=str, help="Path to output model") - parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)") + parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum") args = parser.parse_args() main(args) +