ggml : add GGML_QNT_VERSION to track quantization format changes

https://github.com/ggerganov/ggml/issues/150#issuecomment-1546625668
cuda : fix convert function (#1412 )
2026-04-16 16:27:32 +03:00 · 2023-05-14 10:20:19 +03:00 · 2023-05-13 17:40:58 +03:00
2 changed files with 4 additions and 1 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -414,7 +414,7 @@ static dequantize_mul_mat_vec_cuda_t ggml_get_dequantize_mul_mat_vec_cuda(ggml_t
        case GGML_TYPE_Q8_0:
            return dequantize_mul_mat_vec_q8_0_cuda;
        case GGML_TYPE_F16:
-            return dequantize_mul_mat_vec_q8_0_cuda;
+            return convert_mul_mat_vec_f16_cuda;
        default:
            return nullptr;
    }
--- a/ggml.h
+++ b/ggml.h
@@ -190,6 +190,9 @@
 #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 #define GGML_FILE_VERSION 1

+#define GGML_QNT_VERSION        1    // bump this on quantization format changes
+#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
+
 #define GGML_MAX_DIMS          4
 #define GGML_MAX_NODES         4096
 #define GGML_MAX_PARAMS        256
Author	SHA1	Message	Date
Georgi Gerganov	601a033475	ggml : add GGML_QNT_VERSION to track quantization format changes https://github.com/ggerganov/ggml/issues/150#issuecomment-1546625668	2023-05-14 10:20:19 +03:00
Georgi Gerganov	08737ef720	cuda : fix convert function (#1412 )	2023-05-13 17:40:58 +03:00