8000 docs(examples): Add huggingface pull example · davidvonthenen/llama-cpp-python@252e1ff · GitHub
[go: up one dir, main page]

Skip to content

Commit 252e1ff

Browse files
committed
docs(examples): Add huggingface pull example
1 parent bd4ec2e commit 252e1ff

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

examples/hf_pull/main.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import llama_cpp
2+
import llama_cpp.llama_tokenizer
3+
4+
5+
llama = llama_cpp.Llama.from_pretrained(
6+
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
7+
filename="*q8_0.gguf",
8+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
9+
verbose=False
10+
)
11+
12+
response = llama.create_chat_completion(
13+
messages=[
14+
{
15+
"role": "user",
16+
"content": "What is the capital of France?"
17+
}
18+
],
19+
response_format={
20+
"type": "json_object",
21+
"schema": {
22+
"type": "object",
23+
"properties": {
24+
"country": {"type": "string"},
25+
"capital": {"type": "string"}
26+
},
27+
"required": ["country", "capital"],
28+
}
29+
},
30+
stream=True
31+
)
32+
33+
for chunk in response:
34+
delta = chunk["choices"][0]["delta"]
35+
if "content" not in delta:
36+
continue
37+
print(delta["content"], end="", flush=True)
38+
39+
print()

0 commit comments

Comments
 (0)
0