quantize-stats: add option to test against reference quantization

unbounded · unbounded · commit 63cfa4320086 · 2023-04-05T03:30:23.000+02:00
Expose reference quantization implementation and add option to use it
for tests.
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
@@ -24,6 +24,7 @@ struct quantize_stats_params {
     bool verbose = false;
     bool per_layer_stats = false;
     bool print_histogram = false;
+    bool reference = false;
     std::vector<std::string> include_layers;
     std::vector<std::string> exclude_layers;
     std::vector<enum ggml_type> include_types;
@@ -49,6 +50,8 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
     fprintf(stderr, "  -h, --help            show this help message and exit\n");
     fprintf(stderr, "  -m FNAME, --model FNAME\n");
     fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "  -r, --reference\n");
+    fprintf(stderr, "                        use reference implementation (default: false)\n");
     fprintf(stderr, "  -v, --verbose\n");
     fprintf(stderr, "                        verbose output (default: false)\n");
     fprintf(stderr, "  -p, --per-layer-stats\n");
@@ -135,6 +138,7 @@ void test_roundtrip_on_layer(
         std::string & name,
         bool print_layer_stats,
         const quantize_fns_t & qfns,
+        bool use_reference,
         const ggml_tensor * layer,
         float * input_scratch,
         char *quantized_scratch,
@@ -156,7 +160,11 @@ void test_roundtrip_on_layer(
             input_scratch = ggml_get_data_f32(layer) + offset;
         }
 
-        qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size);
+        if (use_reference) {
+            qfns.quantize_row_q_reference(input_scratch, quantized_scratch, chunk_size);
+        } else {
+            qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size);
+        }
         qfns.dequantize_row_q(quantized_scratch, output_scratch, chunk_size);
 
         update_error_stats(chunk_size, input_scratch, output_scratch, total_error);
@@ -184,6 +192,8 @@ int main(int argc, char ** argv) {
         if (arg == "-h" || arg == "--help") {
             quantize_stats_print_usage(argc, argv);
             exit(0);
+        } else if (arg == "-r" || arg == "--reference") {
+            params.reference = true;
         } else if (arg == "-v") {
             params.verbose = true;
         } else if (arg == "-p" || arg == "--per-layer-stats") {
@@ -320,6 +330,7 @@ int main(int argc, char ** argv) {
                         layer_name,
                         params.per_layer_stats,
                         qfns,
+                        params.reference,
                         kv_tensor.second,
                         input_scratch.data(),
                         quantized_scratch.data(),
diff --git a/ggml.c b/ggml.c
@@ -6499,14 +6499,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
 
 static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q4_0] = {
-        .dequantize_row_q = dequantize_row_q4_0,
-        .quantize_row_q   = quantize_row_q4_0,
-        .vec_dot_q        = ggml_vec_dot_q4_0,
+        .dequantize_row_q         = dequantize_row_q4_0,
+        .quantize_row_q           = quantize_row_q4_0,
+        .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
+        .vec_dot_q                = ggml_vec_dot_q4_0,
     },
     [GGML_TYPE_Q4_1] = {
-        .dequantize_row_q = dequantize_row_q4_1,
-        .quantize_row_q   = quantize_row_q4_1,
-        .vec_dot_q        = ggml_vec_dot_q4_1,
+        .dequantize_row_q         = dequantize_row_q4_1,
+        .quantize_row_q           = quantize_row_q4_1,
+        .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
+        .vec_dot_q                = ggml_vec_dot_q4_1,
     },
 };
 
diff --git a/ggml_internal.h b/ggml_internal.h
@@ -15,6 +15,7 @@ typedef void (*vec_dot_q_t)(const int n, float * restrict s, const void * restri
 typedef struct {
     dequantize_row_q_t dequantize_row_q;
     quantize_row_q_t   quantize_row_q;
+    quantize_row_q_t   quantize_row_q_reference;
     vec_dot_q_t        vec_dot_q;
 } quantize_fns_t;