Add low_vram to server settings

abetlen · abetlen · commit 1e20be6d0c0a · 2023-06-14T22:13:42.000-04:00
diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
         description="Use mmap.",
     )
     embedding: bool = Field(default=True, description="Whether to use embeddings.")
+    low_vram: bool = Field(
+        default=False,
+        description="Whether to use less VRAM. This will reduce performance.",
+    )
     last_n_tokens_size: int = Field(
         default=64,
         ge=0,