@@ -219,6 +219,7 @@ def __init__(
219
219
last_n_tokens_size : int = 64 ,
220
220
lora_base : Optional [str ] = None ,
221
221
lora_path : Optional [str ] = None ,
222
+ low_vram : bool = False ,
222
223
verbose : bool = True ,
223
224
):
224
225
"""Load a llama.cpp model from `model_path`.
@@ -260,6 +261,7 @@ def __init__(
260
261
self .params .use_mmap = use_mmap if lora_path is None else False
261
262
self .params .use_mlock = use_mlock
262
263
self .params .embedding = embedding
264
+ self .params .low_vram = low_vram
263
265
264
266
self .last_n_tokens_size = last_n_tokens_size
265
267
self .n_batch = min (n_ctx , n_batch )
@@ -1447,6 +1449,7 @@ def __getstate__(self):
1447
1449
use_mmap = self .params .use_mmap ,
1448
1450
use_mlock = self .params .use_mlock ,
1449
1451
embedding = self .params .embedding ,
1452
+ low_vram = self .params .low_vram ,
1450
1453
last_n_tokens_size = self .last_n_tokens_size ,
1451
1454
n_batch = self .n_batch ,
1452
1455
n_threads = self .n_threads ,
@@ -1470,6 +1473,7 @@ def __setstate__(self, state):
1470
1473
use_mmap = state ["use_mmap" ],
1471
1474
use_mlock = state ["use_mlock" ],
1472
1475
embedding = state ["embedding" ],
1476
+ low_vram = state ["low_vram" ],
1473
1477
n_threads = state ["n_threads" ],
1474
1478
n_batch = state ["n_batch" ],
1475
1479
last_n_tokens_size = state ["last_n_tokens_size" ],
0 commit comments