common: add configuration presets for chat and reranking servers

Added two new configuration presets to simplify command-line usage: 1. --chat-llama3-8b-default for running a chat server with Llama3 8B model, 2. --rerank-bge-default for running a reranking server with the BGE model. These presets configure appropriate model paths, server ports, GPU settings, and other parameters. Refs: #10932
ggml-org · heyyymonth · May 12, 2025 · May 13, 2025 · May 12, 2025 · 8d23bfc10f0783551994a43918386e114a6b3ade
commit 8d23bfc10f0783551994a43918386e114a6b3ade
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -3325,5 +3325,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}));
 
+    add_opt(common_arg(
+        {"--chat-llama3-8b-default"},
+        string_format("use default Llama3 8B model for chat server (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.model.hf_repo = "ggml-org/Llama-3-8B-Q8_0-GGUF";
+            params.model.hf_file = "llama-3-8b-q8_0.gguf";
+            params.port = 8080;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ubatch = 512;
+            params.n_batch = 512;
+            params.n_ctx = 4096;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
+    add_opt(common_arg(
+        {"--rerank-bge-default"},
+        string_format("use default BGE reranker model for reranking server (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.model.hf_repo = "ggml-org/bge-reranker-base-Q8_0-GGUF";
+            params.model.hf_file = "bge-reranker-base-q8_0.gguf";
+            params.port = 8090;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ctx = 512;
+            params.reranking = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
     return ctx_arg;
 }