File tree Expand file tree Collapse file tree 3 files changed +24
-2
lines changed Expand file tree Collapse file tree 3 files changed +24
-2
lines changed Original file line number Diff line number Diff line change @@ -577,6 +577,12 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format cha
577
577
That will format the prompt according to how model expects it. You can find the prompt format in the model card.
578
578
For possible options, see [ llama_cpp/llama_chat_format.py] ( llama_cpp/llama_chat_format.py ) and look for lines starting with "@register_chat_format".
579
579
580
+ If you have ` huggingface-hub ` installed, you can also use the ` --hf_model_repo_id ` flag to load a model from the Hugging Face Hub.
581
+
582
+ ``` bash
583
+ python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model ' *q8_0.gguf'
584
+ ```
585
+
580
586
### Web Server Features
581
587
582
588
- [ Local Copilot replacement] ( https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion )
Original file line number Diff line number Diff line change @@ -120,9 +120,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
120
120
kv_overrides [key ] = float (value )
121
121
else :
122
122
raise ValueError (f"Unknown value type { value_type } " )
123
+
124
+ import functools
123
125
124
- _model = llama_cpp .Llama (
125
- model_path = settings .model ,
126
+ kwargs = {}
127
+
128
+ if settings .hf_model_repo_id is not None :
129
+ create_fn = functools .partial (llama_cpp .Llama .from_pretrained , repo_id = settings .hf_model_repo_id , filename = settings .model )
130
+ else :
131
+ create_fn = llama_cpp .Llama
132
+ kwargs ["model_path" ] = settings .model
133
+
134
+
135
+ _model = create_fn (
136
+ ** kwargs ,
126
137
# Model Params
127
138
n_gpu_layers = settings .n_gpu_layers ,
128
139
main_gpu = settings .main_gpu ,
Original file line number Diff line number Diff line change @@ -143,6 +143,11 @@ class ModelSettings(BaseSettings):
143
143
default = None ,
144
144
description = "The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained()." ,
145
145
)
146
+ # Loading from HuggingFace Model Hub
147
+ hf_model_repo_id : Optional [str ] = Field (
148
+ default = None ,
149
+ description = "The model repo id to use for the HuggingFace tokenizer model." ,
150
+ )
146
151
# Speculative Decoding
147
152
draft_model : Optional [str ] = Field (
148
153
default = None ,
You can’t perform that action at this time.
0 commit comments