File tree Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Original file line number Diff line number Diff line change
1
+ import os
2
+ import argparse
3
+ import llama_cpp
4
+
5
+
6
+ def main (args ):
7
+ if not os .path .exists (fname_inp ):
8
+ raise RuntimeError (f"Input file does not exist ({ fname_inp } )" )
9
+ if os .path .exists (fname_out ):
10
+ raise RuntimeError (f"Output file already exists ({ fname_out } )" )
11
+ fname_inp = args .fname_inp .encode ("utf-8" )
12
+ fname_out = args .fname_out .encode ("utf-8" )
13
+ itype = args .itype
14
+ return_code = llama_cpp .llama_model_quantize (fname_inp , fname_out , itype )
15
+ if return_code != 0 :
16
+ raise RuntimeError ("Failed to quantize model" )
17
+
18
+
19
+ if __name__ == "__main__" :
20
+ parser = argparse .ArgumentParser ()
21
+ parser .add_argument ("fname_inp" , type = str , help = "Path to input model" )
22
+ parser .add_argument ("fname_out" , type = str , help = "Path to output model" )
23
+ parser .add_argument ("type" , type = int , help = "Type of quantization (2: q4_0, 3: q4_1)" )
24
+ args = parser .parse_args ()
25
+ main (args )
You can’t perform that action at this time.
0 commit comments