Update n_batch for server

abetlen · abetlen · commit 3cab3ef4cb1a · 2023-04-25T09:11:32.000-04:00
diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py
@@ -28,7 +28,7 @@
 class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
-    n_batch: int = 8
+    n_batch: int = 512
     n_threads: int = max((os.cpu_count() or 2) // 2, 1)
     f16_kv: bool = True
     use_mlock: bool = False  # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...