File tree Expand file tree Collapse file tree 1 file changed +39
-0
lines changed Expand file tree Collapse file tree 1 file changed +39
-0
lines changed Original file line number Diff line number Diff line change
1
+ import llama_cpp
2
+ import llama_cpp .llama_tokenizer
3
+
4
+
5
+ llama = llama_cpp .Llama .from_pretrained (
6
+ repo_id = "Qwen/Qwen1.5-0.5B-Chat-GGUF" ,
7
+ filename = "*q8_0.gguf" ,
8
+ tokenizer = llama_cpp .llama_tokenizer .LlamaHFTokenizer .from_pretrained ("Qwen/Qwen1.5-0.5B" ),
9
+ verbose = False
10
+ )
11
+
12
+ response = llama .create_chat_completion (
13
+ messages = [
14
+ {
15
+ "role" : "user" ,
16
+ "content" : "What is the capital of France?"
17
+ }
18
+ ],
19
+ response_format = {
20
+ "type" : "json_object" ,
21
+ "schema" : {
22
+ "type" : "object" ,
23
+ "properties" : {
24
+ "country" : {"type" : "string" },
25
+ "capital" : {"type" : "string" }
26
+ },
27
+ "required" : ["country" , "capital" ],
28
+ }
29
+ },
30
+ stream = True
31
+ )
32
+
33
+ for chunk in response :
34
+ delta = chunk ["choices" ][0 ]["delta" ]
35
+ if "content" not in delta :
36
+ continue
37
+ print (delta ["content" ], end = "" , flush = True )
38
+
39
+ print ()
You can’t perform that action at this time.
0 commit comments