Remove accidentally kept prints + min_keep support

ggml-org · Green-Sky · Oct 31, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
commit 62fc77153b968f12d1da1e67193d94f4c591f873
diff --git a/llama.cpp b/llama.cpp
@@ -7372,17 +7372,30 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can
 
     // Calculate the minimum percentage requirement.
     multiplied_min_p = base_min_p * multiplication_factor;
-    printf("Base min_p value: %f\n", base_min_p);
-    printf("Calculated multiplied_min_p (threshold) value: %f\n", multiplied_min_p);
 
     // Store the tokens that meet the threshold in a new list.
     std::vector<llama_token_data> filtered_candidates;
     filtered_candidates.reserve(candidates->size);  // Reserve to avoid multiple reallocations
 
+    size_t kept_count = 0;  // Counter for how many tokens are kept
+
     for (size_t i = 0; i < candidates->size; ++i) {
         // If a token's probability is above the threshold, we keep it.
         if (candidates->data[i].p >= multiplied_min_p) {
             filtered_candidates.push_back(candidates->data[i]);
+            kept_count++;  // Increment the counter
+        }
+    }
+
+    // If not enough candidates meet the threshold, take the top 'min_keep' ones
+    if (kept_count < min_keep) {
+        std::sort(candidates->data, candidates->data + candidates->size, 
+                  [](const llama_token_data & a, const llama_token_data & b) {
+                      return a.p > b.p;  // Sort by probability in descending order
+                  });
+        filtered_candidates.clear();  // Clear the previously filtered candidates
+        for (size_t i = 0; i < min_keep; ++i) {
+            filtered_candidates.push_back(candidates->data[i]);
         }
     }