8000 docs(examples): Add gradio chat example · ducky777/llama-cpp-python@bd4ec2e · GitHub
[go: up one dir, main page]

Skip to content

Commit bd4ec2e

Browse files
committed
docs(examples): Add gradio chat example
1 parent dcf38f6 commit bd4ec2e

File tree

2 files changed

+115
-0
lines changed

2 files changed

+115
-0
lines changed

examples/gradio_chat/local.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import llama_cpp
2+
import llama_cpp.llama_tokenizer
3+
4+
import gradio as gr
5+
6+
llama = llama_cpp.Llama.from_pretrained(
7+
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
8+
filename="*q8_0.gguf",
9+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
10+
verbose=False
11+
)
12+
13+
model = "gpt-3.5-turbo"
14+
15+
def predict(message, history):
16+
messages = []
17+
18+
for user_message, assistant_message in history:
19+
messages.append({"role": "user", "content": user_message})
20+
messages.append({"role": "assistant", "content": assistant_message})
21+
22+
messages.append({"role": "user", "content": message})
23+
24+
response = llama.create_chat_completion_openai_v1(
25+
model=model,
26+
messages=messages,
27+
stream=True
28+
)
29+
30+
text = ""
31+
for chunk in response:
32+
content = chunk.choices[0].delta.content
33+
if content:
34+
text += content
35+
yield text
36+
37+
38+
js = """function () {
39+
gradioURL = window.location.href
40+
if (!gradioURL.endsWith('?__theme=dark')) {
41+
window.location.replace(gradioURL + '?__theme=dark');
42+
}
43+
}"""
44+
45+
css = """
46+
footer {
47+
visibility: hidden;
48+
}
49+
full-height {
50+
height: 100%;
51+
}
52+
"""
53+
54+
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
55+
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
56+
57+
58+
if __name__ == "__main__":
59+
demo.launch()

examples/gradio_chat/server.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import gradio as gr
2+
3+
from openai import OpenAI
4+
5+
client = OpenAI(
6+
base_url="http://localhost:8000/v1",
7+
api_key="llama.cpp"
8+
)
9+
10+
model = "gpt-3.5-turbo"
11+
12+
def predict(message, history):
13+
messages = []
14+
15+
for user_message, assistant_message in history:
16+
messages.append({"role": "user", "content": user_message})
17+
messages.append({"role": "assistant", "content": assistant_message})
18+
19+
messages.append({"role": "user", "content": message})
20+
21+
response = client.chat.completions.create(
22+
model=model,
23+
messages=messages,
24+
stream=True
25+
)
26+
27+
text = ""
28+
for chunk in response:
29+
content = chunk.choices[0].delta.content
30+
if content:
31+
text += content
32+
yield text
33+
34+
35+
js = """function () {
36+
gradioURL = window.location.href
37+
if (!gradioURL.endsWith('?__theme=dark')) {
38+
window.location.replace(gradioURL + '?__theme=dark');
39+
}
40+
}"""
41+
42+
css = """
43+
footer {
44+
visibility: hidden;
45+
}
46+
full-height {
47+
height: 100%;
48+
}
49+
"""
50+
51+
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
52+
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
53+
54+
55+
if __name__ == "__main__":
56+
demo.launch()

0 commit comments

Comments
 (0)
0