hariag
diff --git a/‎llama_cpp/llama_types.py
Lines changed: 4 additions & 1 deletion b/‎llama_cpp/llama_types.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎llama_cpp/server/app.py
Lines changed: 7 additions & 0 deletions b/‎llama_cpp/server/app.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎llama_cpp/server/types.py
Lines changed: 1 addition & 0 deletions b/‎llama_cpp/server/types.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎llama_cpp/server/util.py
Lines changed: 31 additions & 0 deletions b/‎llama_cpp/server/util.py
Lines changed: 31 additions & 0 deletions
@@ -96,6 +96,8 @@ class CreateChatCompletionResponse(TypedDict):
     model: str
     choices: List["ChatCompletionResponseChoice"]
     usage: CompletionUsage
+    nonce: Optional[str]
+    s1: Optional[str]
 
 
 class ChatCompletionMessageToolCallChunkFunction(TypedDict):
@@ -143,7 +145,8 @@ class CreateChatCompletionStreamResponse(TypedDict):
     object: Literal["chat.completion.chunk"]
     created: int
     choices: List[ChatCompletionStreamResponseChoice]
-
+    nonce: Optional[str]
+    s1: Optional[str]
 
 class ChatCompletionFunctions(TypedDict):
     name: str
 
@@ -22,6 +22,7 @@
 from sse_starlette.sse import EventSourceResponse
 from starlette_context.plugins import RequestIdPlugin  # type: ignore
 from starlette_context.middleware import RawContextMiddleware
+from llama_cpp.server.util import get_device_info
 
 from llama_cpp.server.model import (
     LlamaProxy,
@@ -510,6 +511,7 @@ async def create_chat_completion(
         "logit_bias_type",
         "user",
         "min_tokens",
+        "challenge"
     }
     kwargs = body.model_dump(exclude=exclude)
     llama = llama_proxy(body.model)
@@ -532,6 +534,7 @@ async def create_chat_completion(
         else:
             kwargs["logits_processor"].extend(_min_tokens_logits_processor)
 
+    nonce, s1 = get_device_info(body.challenge)
     iterator_or_completion: Union[
         llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
     ] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
@@ -544,6 +547,8 @@ async def create_chat_completion(
         # the iterator is valid and we can use it to stream the response.
         def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
             yield first_response
+            iterator_or_completion["nonce"] = nonce
+            iterator_or_completion["s1"] = str(s1)
             yield from iterator_or_completion
             exit_stack.close()
 
@@ -562,6 +567,8 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
         )
     else:
         exit_stack.close()
+        iterator_or_completion["nonce"] = nonce
+        iterator_or_completion["s1"] = str(s1)
         return iterator_or_completion
 
 
 
@@ -232,6 +232,7 @@ class CreateChatCompletionRequest(BaseModel):
     frequency_penalty: Optional[float] = frequency_penalty_field
     logit_bias: Optional[Dict[str, float]] = Field(None)
     seed: Optional[int] = Field(None)
+    challenge: Optional[Union[str, List[str]]] = Field(None)
     response_format: Optional[llama_cpp.ChatCompletionRequestResponseFormat] = Field(
         default=None,
     )
 
@@ -0,0 +1,31 @@
+import ctypes
+import os
+
+rust_lib = ctypes.CDLL("/usr/lib/x86_64-linux-gnu/libhash.so")
+rust_lib.device_info.argtypes = [ctypes.c_char_p]
+rust_lib.device_info.restype = ctypes.c_char_p
+
+def version():
+    return rust_lib.version()
+
+def device_info(key):
+    key_bytes = key.encode('utf-8')
+    return rust_lib.device_info(key_bytes)
+
+def get_device_info(key="123"):
+    device_infos = device_info(key)
+    import json
+    device_infos = json.loads(device_infos)
+    nonce = device_infos["devices"][0]["nonce"]
+    seed = device_infos["devices"][0]["s1"]
+    return (nonce, seed)
+
+
+if __name__ == "__main__":
+    key = "AMSMgPqkDGFPANDuJ1MpUiG3N7fcoVyABakcfQixnLa3"
+    device_index = 0
+    device_uuid = "30dcf980f95b736939b3da28170dc6f824a8901d456fc60da4c9156b4e4f8c20550081155abdbfeed01546846f0735731f565cddf5f7dc6d7804777fd9a796eb"
+    device_infos = device_info(key)
+    print(device_infos)
+
+
Original file line number	Diff line number	Diff line change

`232`	`232`	`frequency_penalty: Optional[float] = frequency_penalty_field`
`233`	`233`	`logit_bias: Optional[Dict[str, float]] = Field(None)`
`234`	`234`	`seed: Optional[int] = Field(None)`
	`235`	`+ challenge: Optional[Union[str, List[str]]] = Field(None)`
`235`	`236`	`response_format: Optional[llama_cpp.ChatCompletionRequestResponseFormat] = Field(`
`236`	`237`	`default=None,`
`237`	`238`	`)`