8000 new version · abetlen/llama-cpp-python@0d34e84 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0d34e84

Browse files
committed
new version
1 parent 5a57d7f commit 0d34e84

File tree

4 files changed

+20
-16
lines changed

4 files changed

+20
-16
lines changed

llama_cpp/llama.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -355,16 +355,15 @@ def __init__(
355355
verbose=self.verbose,
356356
)
357357

358-
if self.lora_path:
359-
if self._model.apply_lora_from_file(
360-
self.lora_path,
361-
self.lora_scale,
362-
self.lora_base,
363-
self.n_threads,
364-
):
365-
raise RuntimeError(
366-
f"Failed to apply LoRA from lora path: {self.lora_path} to base path: {self.lora_base}"
367-
)
358+
if self.lora_path and self._model.apply_lora_from_file(
359+
self.lora_path,
360+
self.lora_scale,
361+
self.lora_base,
362+
self.n_threads,
363+
):
364+
raise RuntimeError(
365+
f"Failed to apply LoRA from lora path: {self.lora_path} to base path: {self.lora_base}"
366+
)
368367

369368
if self.verbose:
370369
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
@@ -450,7 +449,7 @@ def __init__(
450449
if self.verbose:
451450
print(f"Using fallback chat format: {chat_format}", file=sys.stderr)
452451

453-
def _load_control_vector(self, filepath: str, strength: float = 1.9):
452+
def _load_control_vector(self, filepath: str, strength: float = 1.7):
454453
if not os.path.exists(filepath):
455454
raise ValueError(f"Control vector file does not exist: {filepath}")
456455
if not filepath.endswith(".json"):

llama_cpp/managers/cache.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,13 @@ def create_completion_with_cache(
7777
**kwargs,
7878
):
7979
"""Predict the given prompt with the given max tokens and cache the result."""
80-
80+
8181
if not stop_tokens:
8282
stop_tokens = ["</s>"]
8383

8484
print(prompt)
8585
prompt = prompt.strip()
86-
86+
8787
if self.current_state == prompt:
8888
print("Prompt is the same as previous. Assuming new turn")
8989
# self.reset()
@@ -96,7 +96,7 @@ def create_completion_with_cache(
9696
print("Not using cached state")
9797
partial_prompt = prompt
9898
self.reset()
99-
99+
100100
if to_eval:
101101
print("Evaluating partial prompt")
102102
prompt_tokens = self.get_prompt_tokens(partial_prompt)
@@ -119,7 +119,7 @@ def create_completion_with_cache(
119119
for output in outputs:
120120
yield output
121121
results += output["choices"][0]["text"]
122-
122+
123123
self.current_state = prompt + results
124124

125125
if self.flush_cache:

llama_cpp/server/model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from llama_cpp.server.settings import ModelSettings
1212
from llama_cpp.managers.cache import LlamaCacheManager
1313

14+
1415
class LlamaProxy:
1516
def __init__(self, models: List[ModelSettings]) -> None:
1617
assert models, "No models provided!"
@@ -134,6 +135,10 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
134135
else:
135136
create_fn = LlamaCacheManager
136137
kwargs["model_path"] = settings.model
138+
# kwargs["control_vectors"] = [
139+
# "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system.json",
140+
# "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system_critical.json"
141+
# ]
137142

138143
_model = create_fn(
139144
**kwargs,

scripts/start.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
22

3-
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
3+
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000 --host 0.0.0.0
44

55
python3 llama_cpp/server --model "../data/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000

0 commit comments

Comments
 (0)
0