8000 Add functionary support (#784) · LOGp/llama-cpp-python@3af7b21 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3af7b21

Browse files
authored
Add functionary support (abetlen#784)
* Add common grammars and json-schema-to-grammar utility function from llama.cpp * Pass functions to format function * Add basic functionary formatting * Add LlamaChatHandler for more complex chat use cases * Add function calling example notebook * Add support for regular chat completions alongside function calling
1 parent df31303 commit 3af7b21

File tree

5 files changed

+936
-99
lines changed

5 files changed

+936
-99
lines changed

examples/notebooks/Functions.ipynb

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"{\n",
13+
" \"id\": \"chatcmpl-a6db1bbb-a128-4c28-88fe-30717ec806b2\",\n",
14+
" \"object\": \"chat.completion\",\n",
15+
" \"created\": 1698989577,\n",
16+
" \"model\": \"gpt-3.5-turbo-0613\",\n",
17+
" \"choices\": [\n",
18+
" {\n",
19+
" \"index\": 0,\n",
20+
" \"message\": {\n",
21+
" \"role\": \"assistant\",\n",
22+
" \"content\": \"The current weather in Boston is sunny with a temperature of 72 degrees\"\n",
23+
" },\n",
24+
" \"finish_reason\": \"length\"\n",
25+
" }\n",
26+
" ],\n",
27+
" \"usage\": {\n",
28+
" \"prompt_tokens\": 135,\n",
29+
" \"completion_tokens\": 16,\n",
30+
" \"total_tokens\": 151\n",
31+
" }\n",
32+
"}\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"import openai\n",
38+
"import json\n",
39+
"\n",
40+
"openai.api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n",
41+
"openai.api_base = \"http://100.64.159.73:8000/v1\"\n",
42+
"\n",
43+
"# Example dummy function hard coded to return the same weather\n",
44+
"# In production, this could be your backend API or an external API\n",
45+
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
46+
" \"\"\"Get the current weather in a given location\"\"\"\n",
47+
" weather_info = {\n",
48+
" \"location\": location,\n",
49+
" \"temperature\": \"72\",\n",
50+
" \"unit\": unit,\n",
51+
" \"forecast\": [\"sunny\", \"windy\"],\n",
52+
" }\n",
53+
" return json.dumps(weather_info)\n",
54+
"\n",
55+
"def run_conversation():\n",
56+
" # Step 1: send the conversation and available functions to GPT\n",
57+
" messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}]\n",
58+
" functions = [\n",
59+
" {\n",
60+
" \"name\": \"get_current_weather\",\n",
61+
" \"description\": \"Get the current weather in a given location\",\n",
62+
" \"parameters\": {\n",
63+
" \"type\": \"object\",\n",
64+
" \"properties\": {\n",
65+
" \"location\": {\n",
66+
" \"type\": \"string\",\n",
67+
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
68+
" },\n",
69+
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
70+
" },\n",
71+
" \"required\": [\"location\"],\n",
72+
" },\n",
73+
" }\n",
74+
" ]\n",
75+
" response = openai.ChatCompletion.create(\n",
76+
" model=\"gpt-3.5-turbo-0613\",\n",
77+
" messages=messages,\n",
78+
" functions=functions,\n",
79+
" function_call=\"auto\", # auto is default, but we'll be explicit\n",
80+
" )\n",
81+
" response_message = response[\"choices\"][0][\"message\"]\n",
82+
"\n",
83+
" # Step 2: check if GPT wanted to call a function\n",
84+
" if response_message.get(\"function_call\"):\n",
85+
" # Step 3: call the function\n",
86+
" # Note: the JSON response may not always be valid; be sure to handle errors\n",
87+
" available_functions = {\n",
88+
" \"get_current_weather\": get_current_weather,\n",
89+
" } # only one function in this example, but you can have multiple\n",
90+
" function_name = response_message[\"function_call\"][\"name\"]\n",
91+
" fuction_to_call = available_functions[function_name]\n",
92+
" function_args = json.loads(response_message[\"function_call\"][\"arguments\"])\n",
93+
" function_response = fuction_to_call(\n",
94+
" location=function_args.get(\"location\"),\n",
95+
" unit=function_args.get(\"unit\"),\n",
96+
" )\n",
97+
"\n",
98+
" # Step 4: send the info on the function call and function response to GPT\n",
99+
" messages.append(response_message) # extend conversation with assistant's reply\n",
100+
" messages.append(\n",
101+
" {\n",
102+
" \"role\": \"function\",\n",
103+
" \"name\": function_name,\n",
104+
" \"content\": function_response,\n",
105+
" }\n",
106+
" ) # extend conversation with function response\n",
107+
" second_response = openai.ChatCompletion.create(\n",
108+
" model=\"gpt-3.5-turbo-0613\",\n",
109+
" messages=messages,\n",
110+
" ) # get a new response from GPT where it can see the function response\n",
111+
" return second_response\n",
112+
" else:\n",
113+
" print(response)\n",
114+
" print(\"No function\")\n",
115+
"\n",
116+
"print(run_conversation())"
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": 2,
122+
"metadata": {},
123+
"outputs": [
124+
{
125+
"name": "stdout",
126+
"output_type": "stream",
127+
"text": [
128+
"name='Jason' age=25\n"
129+
]
130+
}
131+
],
132+
"source": [
133+
"from pydantic import BaseModel\n",
134+
"from instructor import patch\n",
135+
"\n",
136+
"patch()\n",
137+
"\n",
138+
"class UserDetail(BaseModel):\n",
139+
" name: str\n",
140+
" age: int\n",
141+
"\n",
142+
"user: UserDetail = openai.ChatCompletion.create(\n",
143+
" model=\"gpt-3.5-turbo\",\n",
144+
" response_model=UserDetail,\n",
145+
" messages=[\n",
146+
" {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n",
147+
" ]\n",
148+
")\n",
149+
"print(user)"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": 3,
155+
"metadata": {},
156+
"outputs": [
157+
{
158+
"name": "stdout",
159+
"output_type": "stream",
160+
"text": [
161+
"{\n",
162+
" \"id\": \"chatcmpl-59bcefad-9df5-4d6b-802c-5537b3e9044e\",\n",
163+
" \"object\": \"chat.completion\",\n",
164+
" \"created\": 1698989585,\n",
165+
" \"model\": \"gpt-3.5-turbo-0613\",\n",
166+
" \"choices\": [\n",
167+
" {\n",
168+
" \"index\": 0,\n",
169+
" \"message\": {\n",
170+
" \"role\": \"assistant\",\n",
171+
" \"content\": \"I don't have up-to-date information on the current weather conditions\"\n",
172+
" },\n",
173+
" \"finish_reason\": \"length\"\n",
174+
" }\n",
175+
" ],\n",
176+
" \"usage\": {\n",
177+
" \"prompt_tokens\": 62,\n",
178+
" \"completion_tokens\": 16,\n",
179+
" \"total_tokens\": 78\n",
180+
" }\n",
181+
"}\n"
182+
]
183+
}
184+
],
185+
"source": [
186+
"response = openai.ChatCompletion.create(\n",
187+
" model=\"gpt-3.5-turbo-0613\",\n",
188+
" messages=[\n",
189+
" {\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}\n",
190+
" ]\n",
191+
")\n",
192+
"print(response)"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [],
200+
"source": []
201+
}
202+
],
203+
"metadata": {
204+
"kernelspec": {
205+
"display_name": "python-3.8.10",
206+
"language": "python",
207+
"name": "python3"
208+
},
209+
"language_info": {
210+
"codemirror_mode": {
211+
"name": "ipython",
212+
"version": 3
213+
},
214+
"file_extension": ".py",
215+
"mimetype": "text/x-python",
216+
"name": "python",
217+
"nbconvert_exporter": "python",
218+
"pygments_lexer": "ipython3",
219+
"version": "3.11.5+"
220+
},
221+
"orig_nbformat": 4
222+
},
223+
"nbformat": 4,
224+
"nbformat_minor": 2
225+
}

llama_cpp/llama.py

Lines changed: 7 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from . import llama_cpp
2525
from .llama_types import *
2626
from .llama_grammar import LlamaGrammar
27-
from . import llama_chat_format
27+
import llama_cpp.llama_chat_format as llama_chat_format
2828

2929
import numpy as np
3030
import numpy.typing as npt
@@ -428,7 +428,7 @@ def __init__(
428428

429429
if self.verbose:
430430
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
431-
431+
432432
self.chat_format = chat_format
433433

434434
self._n_vocab = self.n_vocab()
@@ -1539,78 +1539,6 @@ def __call__(
15391539
grammar=grammar,
15401540
)
15411541

1542-
def _convert_text_completion_to_chat(
1543-
self, completion: Completion
1544-
) -> ChatCompletion:
1545-
return {
1546-
"id": "chat" + completion["id"],
1547-
"object": "chat.completion",
1548-
"created": completion["created"],
1549-
"model": completion["model"],
1550-
"choices": [
1551-
{
1552-
"index": 0,
1553-
"message": {
1554-
"role": "assistant",
1555-
"content": completion["choices"][0]["text"],
1556-
},
1557-
"finish_reason": completion["choices"][0]["finish_reason"],
1558-
}
1559-
],
1560-
"usage": completion["usage"],
1561-
}
1562-
1563-
def _convert_text_completion_chunks_to_chat(
1564-
self,
1565-
chunks: Iterator[CompletionChunk],
1566-
) -> Iterator[ChatCompletionChunk]:
1567-
for i, chunk in enumerate(chunks):
1568-
if i == 0:
1569-
yield {
1570-
"id": "chat" + chunk["id"],
1571-
"model": chunk["model"],
1572-
"created": chunk["created"],
1573-
"object": "chat.completion.chunk",
1574-
"choices": [
1575-
{
1576-
"index": 0,
1577-
"delta": {
1578-
"role": "assistant",
1579-
},
1580-
"finish_reason": None,
1581-
}
1582-
],
1583-
}
1584-
yield {
1585-
"id": "chat" + chunk["id"],
1586-
"model": chunk["model"],
1587-
"created": chunk["created"],
1588-
"object": "chat.completion.chunk",
1589-
"choices": [
1590-
{
1591-
"index": 0,
1592-
"delta": {
1593-
"content": chunk["choices"][0]["text"],
1594-
}
1595-
if chunk["choices"][0]["finish_reason"] is None
1596-
else {},
1597-
"finish_reason": chunk["choices"][0]["finish_reason"],
1598-
}
1599-
],
1600-
}
1601-
1602-
def _convert_completion_to_chat(
1603-
self,
1604-
completion_or_chunks: Union[Completion, Iterator[CompletionChunk]],
1605-
stream: bool = False,
1606-
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
1607-
if stream:
1608-
chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
1609-
return self._convert_text_completion_chunks_to_chat(chunks)
1610-
else:
1611-
completion: Completion = completion_or_chunks # type: ignore
1612-
return self._convert_text_completion_to_chat(completion)
1613-
16141542
def create_chat_completion(
16151543
self,
16161544
messages: List[ChatCompletionRequestMessage],
@@ -1648,19 +1576,12 @@ def create_chat_completion(
16481576
Returns:
16491577
Generated chat completion or a stream of chat completion chunks.
16501578
"""
1651-
1652-
format = llama_chat_format.get_chat_format(self.chat_format)
1653-
result = format(
1579+
handler = llama_chat_format.get_chat_completion_handler(self.chat_format)
1580+
return handler(
1581+
self,
16541582
messages=messages,
1655-
)
1656-
prompt = result.prompt
1657-
if result.stop is not None:
1658-
stop = [] if stop is None else [stop] if isinstance(stop, str) else stop
1659-
rstop = result.stop if isinstance(result.stop, list) else [result.stop]
1660-
stop = stop + rstop
1661-
1662-
completion_or_chunks = self.create_completion(
1663-
prompt=prompt,
1583+
functions=functions,
1584+
function_call=function_call,
16641585
temperature=temperature,
16651586
top_p=top_p,
16661587
top_k=top_k,
@@ -1678,7 +1599,6 @@ def create_chat_completion(
16781599
logits_processor=logits_processor,
16791600
grammar=grammar,
16801601
)
1681-
return self._convert_completion_to_chat(completion_or_chunks, stream=stream) # type: ignore
16821602

16831603
def _free_model(self, *, _lbatch_free=llama_cpp._lib.llama_batch_free, _lfree_model=llama_cpp._lib.llama_free_model, _free=llama_cpp._lib.llama_free):
16841604
batch = getattr(self, 'batch', None)

0 commit comments

Comments
 (0)
0