Compare commits

..

8 Commits

Author SHA1 Message Date
Borislav Stanimirov
44d28ddd5c cmake : fix use of external ggml (#8787) 2024-07-31 15:40:08 +02:00
Someone
268c566006 nix: cuda: rely on propagatedBuildInputs (#8772)
Some checks failed
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Nix aarch64 builds / nix-build-aarch64 (push) Has been cancelled
Listing individual outputs no longer necessary to reduce the runtime closure size after https://github.com/NixOS/nixpkgs/pull/323056.
2024-07-30 13:35:30 -07:00
Brian
7e72aa74fd py: add_array() will not add to kv store if value is an empty array (#8774)
Some checks failed
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:light-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:server-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix aarch64 builds / nix-build-aarch64 (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
Python Type-Check / pyright type-check (push) Has been cancelled
* gguf_writer.py: add_array() should not add to kv store if empty

* Apply suggestions from code review

I was wondering if there was a specific reason for `if val` but good to hear we can safely use `len(val == 0`

Co-authored-by: compilade <git@compilade.net>

---------

Co-authored-by: compilade <git@compilade.net>
2024-07-31 00:57:03 +10:00
l3utterfly
7c27a19b2e added android implementation of ggml_print_backtrace_symbols (#8751)
* added android implementation of ggml_print_backtrace_symbols

* Update ggml/src/ggml.c

Co-authored-by: slaren <slarengh@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: slaren <slarengh@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: slaren <slarengh@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: slaren <slarengh@gmail.com>

* Update ggml/src/ggml.c

Co-authored-by: slaren <slarengh@gmail.com>

---------

Co-authored-by: slaren <slarengh@gmail.com>
2024-07-30 16:40:18 +02:00
Georgi Gerganov
140074bb86 flake.lock: Update (#8729) 2024-07-30 05:58:57 -07:00
wangshuai09
6e2b6000e5 cann: update cmake (#8765)
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:light-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:server-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
2024-07-30 12:37:35 +02:00
zhentaoyu
c887d8b017 [SYCL] Add TIMESTEP_EMBEDDING OP (#8707)
Signed-off-by: zhentaoyu <zhentao.yu@intel.com>
2024-07-30 14:56:51 +08:00
CarterLi999
75af08c475 ggml: bugfix: fix the inactive elements is agnostic for risc-v vector (#8748)
Some checks are pending
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full-cuda.Dockerfile platforms:linux/amd64 tag:full-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/full.Dockerfile platforms:linux/amd64,linux/arm64 tag:full]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-cuda.Dockerfile platforms:linux/amd64 tag:light-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-intel.Dockerfile platforms:linux/amd64 tag:light-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:light-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-cli.Dockerfile platforms:linux/amd64,linux/arm64 tag:light]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-cuda.Dockerfile platforms:linux/amd64 tag:server-cuda]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-intel.Dockerfile platforms:linux/amd64 tag:server-intel]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server-rocm.Dockerfile platforms:linux/amd64,linux/arm64 tag:server-rocm]) (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/llama-server.Dockerfile platforms:linux/amd64,linux/arm64 tag:server]) (push) Waiting to run
Nix CI / nix-eval (macos-latest) (push) Waiting to run
Nix CI / nix-eval (ubuntu-latest) (push) Waiting to run
Nix CI / nix-build (macos-latest) (push) Waiting to run
Nix CI / nix-build (ubuntu-latest) (push) Waiting to run
flake8 Lint / Lint (push) Waiting to run
In these codes, we want to retain the value that they previously held
when mask[i] is false. So we should use undisturbed. With the default
agnostic policy of rvv intrinsic, these values can be held or be
written with 1s.

Co-authored-by: carter.li <carter.li@starfivetech.com>
2024-07-29 18:38:34 +02:00
14 changed files with 169 additions and 42 deletions

View File

@@ -126,16 +126,9 @@ let
++ optionals useMetalKit [ MetalKit ];
cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
cuda_cudart
cuda_cccl # <nv/target>
libcublas
];
rocmBuildInputs = with rocmPackages; [

View File

@@ -139,7 +139,8 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o
# determining _precisely_ which defines are necessary for the llama-config
# package.
#
get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)

6
flake.lock generated
View File

@@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1721379653,
"narHash": "sha256-8MUgifkJ7lkZs3u99UDZMB4kbOxvMEXQZ31FO3SopZ0=",
"lastModified": 1722062969,
"narHash": "sha256-QOS0ykELUmPbrrUGmegAUlpmUFznDQeR4q7rFhl8eQg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "1d9c2c9b3e71b9ee663d11c5d298727dace8d374",
"rev": "b73c2221a46c13557b1b3be9c2070cc42cf01eb3",
"type": "github"
},
"original": {

View File

@@ -207,6 +207,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-alloc.h
include/ggml-backend.h
include/ggml-blas.h
include/ggml-cann.h
include/ggml-cuda.h
include/ggml.h
include/ggml-kompute.h

View File

@@ -849,11 +849,6 @@ if (GGML_CANN)
${CANN_INSTALL_DIR}/acllib/include
)
# TODO: find libs
link_directories(
${CANN_INSTALL_DIR}/lib64
)
add_subdirectory(ggml-cann/kernels)
list(APPEND CANN_LIBRARIES
ascendcl
@@ -872,6 +867,7 @@ if (GGML_CANN)
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
set(GGML_EXTRA_LIBDIRS ${GGML_EXTRA_LIBDIRS} ${CANN_INSTALL_DIR}/lib64)
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
endif()
else()

View File

@@ -6449,22 +6449,22 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
// compute mask for subtraction
vuint8m1_t qh_m0 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_0 = __riscv_vmseq_vx_u8m1_b8(qh_m0, 0, vl);
vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_m(vmask_0, q3_0, 0x4, vl);
vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_mu(vmask_0, q3_0, q3_0, 0x4, vl);
m <<= 1;
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_1 = __riscv_vmseq_vx_u8m1_b8(qh_m1, 0, vl);
vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_m(vmask_1, q3_1, 0x4, vl);
vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_mu(vmask_1, q3_1, q3_1, 0x4, vl);
m <<= 1;
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_2 = __riscv_vmseq_vx_u8m1_b8(qh_m2, 0, vl);
vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_m(vmask_2, q3_2, 0x4, vl);
vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_mu(vmask_2, q3_2, q3_2, 0x4, vl);
m <<= 1;
vuint8m1_t qh_m3 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_3 = __riscv_vmseq_vx_u8m1_b8(qh_m3, 0, vl);
vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_m(vmask_3, q3_3, 0x4, vl);
vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_mu(vmask_3, q3_3, q3_3, 0x4, vl);
m <<= 1;
// load Q8 and take product with Q3
@@ -7720,13 +7720,13 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
vint8m1_t q5_a = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q5_x, 0x0F, vl));
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_1 = __riscv_vmsne_vx_u8m1_b8(qh_m1, 0, vl);
vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_m(vmask_1, q5_a, 16, vl);
vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_mu(vmask_1, q5_a, q5_a, 16, vl);
m <<= 1;
vint8m1_t q5_l = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q5_x, 0x04, vl));
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
vbool8_t vmask_2 = __riscv_vmsne_vx_u8m1_b8(qh_m2, 0, vl);
vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_m(vmask_2, q5_l, 16, vl);
vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_mu(vmask_2, q5_l, q5_l, 16, vl);
m <<= 1;
vint16m2_t v0 = __riscv_vwmul_vv_i16m2(q5_m1, q8_y1, vl);

View File

@@ -4108,6 +4108,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
case GGML_OP_ARGSORT:
func = ggml_sycl_argsort;
break;
case GGML_OP_TIMESTEP_EMBEDDING:
func = ggml_sycl_op_timestep_embedding;
break;
default:
return false;
}
@@ -5225,6 +5228,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
case GGML_OP_UPSCALE:
case GGML_OP_PAD:
case GGML_OP_LEAKY_RELU:
case GGML_OP_TIMESTEP_EMBEDDING:
return true;
default:
return false;

View File

@@ -24,5 +24,6 @@
#include "rope.hpp"
#include "norm.hpp"
#include "softmax.hpp"
#include "tsembd.hpp"
#endif // GGML_SYCL_BACKEND_HPP

View File

@@ -1834,20 +1834,6 @@ namespace dpct
template <typename T1, typename T2, typename T3>
inline auto dp4a(T1 a, T2 b, T3 c)
{
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) && \
defined(__SYCL_CUDA_ARCH__) && __SYCL_CUDA_ARCH__ >= 610
dot_product_acc_t<T1, T2> res;
if constexpr (std::is_same_v<dot_product_acc_t<T1, T2>, uint32_t>) {
asm volatile("dp4a.u32.u32 %0, %1, %2, %3;"
: "=r"(res)
: "r"(a), "r"(b), "r"(c));
} else {
asm volatile("dp4a.s32.s32 %0, %1, %2, %3;"
: "=r"(res)
: "r"(a), "r"(b), "r"(c));
}
return res;
#else
dot_product_acc_t<T1, T2> res = c;
auto va = extract_and_sign_or_zero_extend4(a);
auto vb = extract_and_sign_or_zero_extend4(b);
@@ -1856,7 +1842,6 @@ namespace dpct
res += va[2] * vb[2];
res += va[3] * vb[3];
return res;
#endif
}
struct sub_sat

View File

@@ -42,6 +42,7 @@
#define SYCL_IM2COL_BLOCK_SIZE 256
#define SYCL_POOL2D_BLOCK_SIZE 256
#define SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE 256
#define SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE 256
// dmmv = dequantize_mul_mat_vec
#ifndef GGML_SYCL_DMMV_X

View File

@@ -0,0 +1,71 @@
//
// MIT license
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: MIT
//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
#include "tsembd.hpp"
static void timestep_embedding_f32(
const float * timesteps, float * dst, const int nb1,
const int dim, const int max_period, const sycl::nd_item<3> &item_ct1) {
// item_ct1.get_group(1)(blockIDx.y): idx of timesteps->ne[0]
// item_ct1.get_group(2) (blockIDx.x): idx of ((dim + 1) / 2) / BLOCK_SIZE
int i = item_ct1.get_group(1);
int j = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2);
float * embed_data = (float *)((char *)dst + i*nb1);
if (dim % 2 != 0 && j == ((dim + 1) / 2)) {
embed_data[dim] = 0.f;
}
int half = dim / 2;
if (j >= half) {
return;
}
float timestep = timesteps[i];
float freq = (float)sycl::native::exp(-(sycl::log((float)max_period)) * j / half);
float arg = timestep * freq;
embed_data[j] = sycl::cos(arg);
embed_data[j + half] = sycl::sin(arg);
}
static void timestep_embedding_f32_sycl(
const float * x, float * dst, const int ne00, const int nb1,
const int dim, const int max_period, const queue_ptr& stream) {
// As the kernel returns when thread.idx is larger than dim/2, the half_ceil does not need to pad
int half_ceil = dim / 2;
int num_blocks = (half_ceil + SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE - 1) / SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE;
sycl::range<3> block_dims(1, 1, SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE);
sycl::range<3> gridDim(1, ne00, num_blocks);
stream->parallel_for(
sycl::nd_range<3>(
gridDim * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1) {
timestep_embedding_f32(
x, dst, nb1, dim, max_period, item_ct1
);
});
}
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor * dst) {
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
const int dim = dst->op_params[0];
const int max_period = dst->op_params[1];
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
}

View File

@@ -0,0 +1,21 @@
//
// MIT license
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: MIT
//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
#ifndef GGML_SYCL_TSEMBD_HPP
#define GGML_SYCL_TSEMBD_HPP
#include "common.hpp"
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor * dst);
#endif // GGML_SYCL_TSEMBD_HPP

View File

@@ -141,7 +141,51 @@ typedef pthread_t ggml_thread_t;
#include <sys/wait.h>
#if defined(__linux__)
#if defined(__ANDROID__)
#include <unwind.h>
#include <dlfcn.h>
#include <stdio.h>
struct backtrace_state {
void ** current;
void ** end;
};
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
struct backtrace_state * state = (struct backtrace_state *)arg;
uintptr_t pc = _Unwind_GetIP(context);
if (pc) {
if (state->current == state->end) {
return _URC_END_OF_STACK;
} else {
*state->current++ = (void*)pc;
}
}
return _URC_NO_REASON;
}
static void ggml_print_backtrace_symbols(void) {
const int max = 100;
void* buffer[max];
struct backtrace_state state = {buffer, buffer + max};
_Unwind_Backtrace(unwind_callback, &state);
int count = state.current - buffer;
for (int idx = 0; idx < count; ++idx) {
const void * addr = buffer[idx];
const char * symbol = "";
Dl_info info;
if (dladdr(addr, &info) && info.dli_sname) {
symbol = info.dli_sname;
}
fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
}
}
#elif defined(__linux__)
#include <execinfo.h>
static void ggml_print_backtrace_symbols(void) {
void * trace[100];

View File

@@ -312,6 +312,8 @@ class GGUFWriter:
self.add_key_value(key, val, GGUFValueType.STRING)
def add_array(self, key: str, val: Sequence[Any]) -> None:
if len(val) == 0:
return
self.add_key_value(key, val, GGUFValueType.ARRAY)
@staticmethod
@@ -845,7 +847,14 @@ class GGUFWriter:
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
kv_data += self._pack("Q", len(encoded_val))
kv_data += encoded_val
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
elif vtype == GGUFValueType.ARRAY:
if not isinstance(val, Sequence):
raise ValueError("Invalid GGUF metadata array, expecting sequence")
if len(val) == 0:
raise ValueError("Invalid GGUF metadata array. Empty array")
if isinstance(val, bytes):
ltype = GGUFValueType.UINT8
else: