8000 Add low_vram parameter · colosieve/llama-cpp-python@44b83ca · GitHub
[go: up one dir, main page]

Skip to content

Commit 44b83ca

Browse files
committed
Add low_vram parameter
1 parent f7c5cfa commit 44b83ca

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

llama_cpp/llama.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def __init__(
219219
last_n_tokens_size: int = 64,
220220
lora_base: Optional[str] = None,
221221
lora_path: Optional[str] = None,
222+
low_vram: bool = False,
222223
verbose: bool = True,
223224
):
224225
"""Load a llama.cpp model from `model_path`.
@@ -260,6 +261,7 @@ def __init__(
260261
self.params.use_mmap = use_mmap if lora_path is None else False
261262
self.params.use_mlock = use_mlock
262263
self.params.embedding = embedding
264+
self.params.low_vram = low_vram
263265

264266
self.last_n_tokens_size = last_n_tokens_size
265267
self.n_batch = min(n_ctx, n_batch)
@@ -1447,6 +1449,7 @@ def __getstate__(self):
14471449
use_mmap=self.params.use_mmap,
14481450
use_mlock=self.params.use_mlock,
14491451
embedding=self.params.embedding,
1452+
low_vram=self.params.low_vram,
14501453
last_n_tokens_size=self.last_n_tokens_size,
14511454
n_batch=self.n_batch,
14521455
n_threads=self.n_threads,
@@ -1470,6 +1473,7 @@ def __setstate__(self, state):
14701473
use_mmap=state["use_mmap"],
14711474
use_mlock=state["use_mlock"],
14721475
embedding=state["embedding"],
1476+
low_vram=state["low_vram"],
14731477
n_threads=state["n_threads"],
14741478
n_batch=state["n_batch"],
14751479
last_n_tokens_size=state["last_n_tokens_size"],

0 commit comments

Comments
 (0)
0