File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ def __init__(
77
77
mul_mat_q : bool = True ,
78
78
logits_all : bool = False ,
79
79
embedding : bool = False ,
80
- offload_kqv : bool = False ,
80
+ offload_kqv : bool = True ,
81
81
# Sampling Params
82
82
last_n_tokens_size : int = 64 ,
83
83
# LoRA Params
Original file line number Diff line number Diff line change @@ -90,7 +90,7 @@ class ModelSettings(BaseSettings):
90
90
logits_all : bool = Field (default = True , description = "Whether to return logits." )
91
91
embedding : bool = Field (default = True , description = "Whether to use embeddings." )
92
92
offload_kqv : bool = Field (
93
- default = False , description = "Whether to offload kqv to the GPU."
93
+ default = True , description = "Whether to offload kqv to the GPU."
94
94
)
95
95
# Sampling Params
96
96
last_n_tokens_size : int = Field (
You can’t perform that action at this time.
0 commit comments