ggml-cpu: add RVV vec dot kernels for quantization types (#18859 )

* ggml-cpu: add rvv quantize_row_q8_K kernel Co-authored-by: Rehan Qasim <rehan.qasim@10xengineers.ai> * ggml-cpu: add rvv vec_dot for iq4_nl, mxfp4, iq2_xxs Co-authored-by: Rehan Qasim <rehan.qasim@10xengineers.ai> * ggml-cpu: add rvv vec_dot for iq4_xs, refactor * ggml-cpu: remove ifunc for rvv vec dot * ggml-cpu: add vec_dot for iq2_xs, iq3_xxs Co-authored-by: Rehan Qasim <rehan.qasim@10xengineers.ai> * ggml-cpu: refactor quants.c --------- Co-authored-by: taimur-10x <taimur.ahmad@10xengineers.ai> Co-authored-by: Rehan Qasim <rehan.qasim@10xengineers.ai> Co-authored-by: Rehan Qasim <rehanbhatti0317@gmail.com>
ggml : fix typo gmml (#20512 )
2026-04-30 16:47:31 +03:00 · 2026-03-13 17:36:04 +02:00 · 2026-03-13 14:36:13 +01:00
4 changed files with 1423 additions and 547 deletions
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@@ -253,7 +253,7 @@ option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increas
 option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON)
 option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
 set   (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
-                                            "gmml: OpenCL API version to target")
+                                            "ggml: OpenCL API version to target")

 option(GGML_HEXAGON                         "ggml: enable Hexagon backend"                    OFF)
 set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml: quantize group size (32, 64, or 128)")
--- a/ggml/src/ggml-cpu/arch-fallback.h
+++ b/ggml/src/ggml-cpu/arch-fallback.h
@@ -199,13 +199,6 @@
 #define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
 #elif defined(__riscv)
 // quants.c
-#define quantize_row_q8_K_generic quantize_row_q8_K
-#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
-#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
-#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
-#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
-#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
-#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
 #define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
 // repack.cpp
 #define ggml_quantize_mat_q8_0_4x1_generic ggml_quantize_mat_q8_0_4x1
--- a/ggml/src/ggml-cpu/arch/riscv/quants.c
+++ b/ggml/src/ggml-cpu/arch/riscv/quants.c
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -9624,7 +9624,7 @@ void ggml_compute_forward_win_unpart(
    }
 }

-//gmml_compute_forward_unary
+//ggml_compute_forward_unary

 void ggml_compute_forward_unary(
        const ggml_compute_params * params,