mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-05-07 16:57:34 +03:00
Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
803627f121 | ||
|
|
68380ae11b | ||
|
|
cc97e45a14 | ||
|
|
8e52631d55 | ||
|
|
f4b5a2ee91 | ||
|
|
97f06e9eed | ||
|
|
e358d75adb | ||
|
|
cfff1fc300 | ||
|
|
3980e04d5a | ||
|
|
2496f9c149 | ||
|
|
5207d120ea | ||
|
|
a0101225bc | ||
|
|
a290ce6266 | ||
|
|
a00e47e422 | ||
|
|
750141969c | ||
|
|
a736e6c0ac | ||
|
|
e3e3f8e46a | ||
|
|
f08f20a0e3 | ||
|
|
07eaf919ed | ||
|
|
74d6248f71 | ||
|
|
2ca1161bd7 | ||
|
|
bbeb89d76c | ||
|
|
ff806a110d | ||
|
|
d5003b6e4d | ||
|
|
2635ac76e8 | ||
|
|
70a8309114 | ||
|
|
c91faf997f | ||
|
|
bf76ac77be |
2
.github/workflows/gguf-publish.yml
vendored
2
.github/workflows/gguf-publish.yml
vendored
@@ -29,10 +29,10 @@ jobs:
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
pip-install: poetry==2.4.0
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd gguf-py
|
||||
python -m pip install poetry==2.3.2
|
||||
poetry install
|
||||
|
||||
- name: Build package
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -105,6 +105,8 @@
|
||||
__pycache__/
|
||||
*/poetry.lock
|
||||
poetry.toml
|
||||
poetry.lock
|
||||
uv.lock
|
||||
|
||||
# Nix
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@
|
||||
/ggml/src/ggml-vulkan/ @ggml-org/ggml-vulkan
|
||||
/ggml/src/ggml-webgpu/ @ggml-org/ggml-webgpu
|
||||
/ggml/src/ggml-zdnn/ @ggml-org/ggml-zdnn @Andreas-Krebbel @AlekseiNikiforovIBM
|
||||
/ggml/src/ggml-zendnn/ @avinashcpandey @Jiten1parmar @z-vishal
|
||||
/ggml/src/ggml.c @ggerganov
|
||||
/ggml/src/ggml.cpp @ggerganov
|
||||
/ggml/src/gguf.cpp @JohannesGaessler @Green-Sky
|
||||
|
||||
@@ -248,6 +248,8 @@ std::vector<std::string> common_arg::get_env() const {
|
||||
|
||||
// Helper function to parse tensor buffer override strings
|
||||
static void parse_tensor_buffer_overrides(const std::string & value, std::vector<llama_model_tensor_buft_override> & overrides) {
|
||||
ggml_backend_load_all();
|
||||
|
||||
std::map<std::string, ggml_backend_buffer_type_t> buft_list;
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||
auto * dev = ggml_backend_dev_get(i);
|
||||
@@ -425,6 +427,10 @@ static bool parse_bool_value(const std::string & value) {
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void arg_removed(const std::string & msg) {
|
||||
throw std::invalid_argument("the argument has been removed. " + msg);
|
||||
}
|
||||
|
||||
//
|
||||
// CLI argument parsing functions
|
||||
//
|
||||
@@ -803,6 +809,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val
|
||||
if (dev_names.size() == 1 && dev_names[0] == "none") {
|
||||
devices.push_back(nullptr);
|
||||
} else {
|
||||
ggml_backend_load_all();
|
||||
for (const auto & device : dev_names) {
|
||||
auto * dev = ggml_backend_dev_by_name(device.c_str());
|
||||
if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||
@@ -820,6 +827,7 @@ static void add_rpc_devices(const std::string & servers) {
|
||||
if (rpc_servers.empty()) {
|
||||
throw std::invalid_argument("no RPC servers specified");
|
||||
}
|
||||
ggml_backend_load_all();
|
||||
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
||||
if (!rpc_reg) {
|
||||
throw std::invalid_argument("failed to find RPC backend");
|
||||
@@ -1016,9 +1024,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
|
||||
params.use_color = tty_can_use_colors();
|
||||
|
||||
// load dynamic backends
|
||||
ggml_backend_load_all();
|
||||
|
||||
common_params_context ctx_arg(params);
|
||||
ctx_arg.print_usage = print_usage;
|
||||
ctx_arg.ex = ex;
|
||||
@@ -2275,6 +2280,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
{"--list-devices"},
|
||||
"print list of available devices and exit",
|
||||
[](common_params &) {
|
||||
ggml_backend_load_all();
|
||||
std::vector<ggml_backend_dev_t> devices;
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||
auto * dev = ggml_backend_dev_get(i);
|
||||
@@ -3715,35 +3721,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
{"--draft", "--draft-n", "--draft-max"}, "N",
|
||||
"the argument has been removed. use --spec-draft-n-max or --spec-ngram-mod-n-max",
|
||||
[](common_params & /*params*/, int /*value*/) {
|
||||
throw std::invalid_argument("the argument has been removed. use --spec-draft-n-max or --spec-ngram-mod-n-max");
|
||||
arg_removed("use --spec-draft-n-max or --spec-ngram-mod-n-max");
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_DRAFT_MAX"));
|
||||
add_opt(common_arg(
|
||||
{"--draft-min", "--draft-n-min"}, "N",
|
||||
"the argument has been removed. use --spec-draft-n-min or --spec-ngram-mod-n-min",
|
||||
[](common_params & /*params*/, int /*value*/) {
|
||||
throw std::invalid_argument("the argument has been removed. use --spec-draft-n-min or --spec-ngram-mod-n-min");
|
||||
arg_removed("use --spec-draft-n-min or --spec-ngram-mod-n-min");
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_DRAFT_MIN"));
|
||||
add_opt(common_arg(
|
||||
{"--spec-ngram-size-n"}, "N",
|
||||
"the argument has been removed. use the respective --spec-ngram-*-size-n or --spec-ngram-mod-n-match",
|
||||
[](common_params & /*params*/, int /*value*/) {
|
||||
throw std::invalid_argument("the argument has been removed. use the respective --spec-ngram-*-size-n");
|
||||
arg_removed("use the respective --spec-ngram-*-size-n");
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--spec-ngram-size-m"}, "N",
|
||||
"the argument has been removed. use the respective --spec-ngram-*-size-m",
|
||||
[](common_params & /*params*/, int /*value*/) {
|
||||
throw std::invalid_argument("the argument has been removed. use the respective --spec-ngram-*-size-m");
|
||||
arg_removed("use the respective --spec-ngram-*-size-m");
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--spec-ngram-min-hits"}, "N",
|
||||
"the argument has been removed. use the respective --spec-ngram-*-min-hits",
|
||||
[](common_params & /*params*/, int /*value*/) {
|
||||
throw std::invalid_argument("the argument has been removed. use the respective --spec-ngram-*-min-hits");
|
||||
arg_removed("use the respective --spec-ngram-*-min-hits");
|
||||
}
|
||||
).set_spec().set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
|
||||
|
||||
@@ -109,16 +109,24 @@ static std::vector<llama_device_memory_data> common_get_device_memory_data(
|
||||
ret.back().total = total;
|
||||
}
|
||||
for (size_t i = 0; i < nd; i++) {
|
||||
ggml_backend_dev_t dev = llama_model_get_device(model, i);
|
||||
|
||||
size_t free;
|
||||
size_t total;
|
||||
ggml_backend_dev_memory(llama_model_get_device(model, i), &free, &total);
|
||||
ggml_backend_dev_memory(dev, &free, &total);
|
||||
|
||||
// devices can return 0 bytes for free and total memory if they do not
|
||||
// have any to report. in this case, we will use the host memory as a fallback
|
||||
// fixes: https://github.com/ggml-org/llama.cpp/issues/18577
|
||||
// Some non-GPU accelerator backends, such as BLAS, report 0/0 and rely on
|
||||
// the host-memory fallback. For GPU-like backends, keep 0/0 so --fit does
|
||||
// not assign anything to a device with an unknown memory budget.
|
||||
if (free == 0 && total == 0) {
|
||||
free = ret.back().free;
|
||||
total = ret.back().total;
|
||||
const enum ggml_backend_dev_type type = ggml_backend_dev_type(dev);
|
||||
if (type == GGML_BACKEND_DEVICE_TYPE_GPU || type == GGML_BACKEND_DEVICE_TYPE_IGPU) {
|
||||
LOG_WRN("%s: device %s did not report memory; --fit will not use it\n",
|
||||
__func__, ggml_backend_dev_name(dev));
|
||||
} else {
|
||||
free = ret.back().free;
|
||||
total = ret.back().total;
|
||||
}
|
||||
}
|
||||
ret[i].free = free;
|
||||
ret[i].total = total;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -175,6 +175,7 @@ pre_computed_hashes = [
|
||||
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
|
||||
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
|
||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
|
||||
{"name": "qwen35", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openbmb/MiniCPM-V-4_6", "chkhsh": "1444df51289cfa8063b96f0e62b1125440111bc79a52003ea14b6eac7016fd5f"},
|
||||
{"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
|
||||
# jina-v2-de variants
|
||||
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
|
||||
|
||||
49
docs/multimodal/minicpmv4.6.md
Normal file
49
docs/multimodal/minicpmv4.6.md
Normal file
@@ -0,0 +1,49 @@
|
||||
## MiniCPM-V 4.6
|
||||
|
||||
### Prepare models and code
|
||||
|
||||
Download [MiniCPM-V-4_6](https://huggingface.co/openbmb/MiniCPM-V-4_6) PyTorch model from huggingface to "MiniCPM-V-4_6" folder.
|
||||
|
||||
The model must be the standard `transformers` v5.7.0+ checkpoint (no `trust_remote_code`); the architecture in `config.json` is `MiniCPMV4_6ForConditionalGeneration` with a `qwen3_5_text` text model and a SigLIP-based vision tower plus a window-attention `vit_merger`.
|
||||
|
||||
### Build llama.cpp
|
||||
|
||||
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md)
|
||||
|
||||
Clone llama.cpp:
|
||||
```bash
|
||||
git clone https://github.com/ggml-org/llama.cpp
|
||||
cd llama.cpp
|
||||
```
|
||||
|
||||
Build llama.cpp using `CMake`:
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
|
||||
### Usage of MiniCPM-V 4.6
|
||||
|
||||
Unlike older MiniCPM-V variants, MiniCPM-V 4.6 is converted directly through `convert_hf_to_gguf.py`. The same script is invoked twice on the original Hugging Face directory: once to produce the language-model GGUF and once with `--mmproj` to produce the multimodal projector GGUF.
|
||||
|
||||
```bash
|
||||
# language model
|
||||
python ./convert_hf_to_gguf.py ../MiniCPM-V-4_6 --outfile ../MiniCPM-V-4_6/ggml-model-f16.gguf
|
||||
|
||||
# multimodal projector (vision tower + window-attention vit_merger + DownsampleMLP merger)
|
||||
python ./convert_hf_to_gguf.py ../MiniCPM-V-4_6 --mmproj --outfile ../MiniCPM-V-4_6/mmproj-model-f16.gguf
|
||||
|
||||
# optional: quantize to Q4_K_M
|
||||
./build/bin/llama-quantize ../MiniCPM-V-4_6/ggml-model-f16.gguf ../MiniCPM-V-4_6/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
```
|
||||
|
||||
|
||||
Inference on Linux or Mac
|
||||
```bash
|
||||
# run in single-turn mode
|
||||
./build/bin/llama-mtmd-cli -m ../MiniCPM-V-4_6/ggml-model-f16.gguf --mmproj ../MiniCPM-V-4_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# run in conversation mode
|
||||
./build/bin/llama-mtmd-cli -m ../MiniCPM-V-4_6/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-4_6/mmproj-model-f16.gguf
|
||||
```
|
||||
@@ -41,6 +41,9 @@ int main(int argc, char ** argv) {
|
||||
std::string result3;
|
||||
|
||||
// init
|
||||
|
||||
ggml_backend_load_all();
|
||||
|
||||
auto llama_init = common_init_from_params(params);
|
||||
|
||||
auto * model = llama_init->model();
|
||||
|
||||
@@ -119,7 +119,7 @@ if [ $GGML_SYCL_DEVICE -ne -1 ]; then
|
||||
echo "Use $GGML_SYCL_DEVICE as main GPU"
|
||||
#use signle GPU only
|
||||
GPUS_SETTING="-mg $GGML_SYCL_DEVICE -sm ${SPLIT_MODE}"
|
||||
export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}"
|
||||
export ONEAPI_DEVICE_SELECTOR="level_zero:${GGML_SYCL_DEVICE}"
|
||||
echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}"
|
||||
else
|
||||
echo "Use all Intel GPUs, including iGPU & dGPU"
|
||||
|
||||
@@ -4,8 +4,8 @@ project("ggml" C CXX ASM)
|
||||
|
||||
### GGML Version
|
||||
set(GGML_VERSION_MAJOR 0)
|
||||
set(GGML_VERSION_MINOR 10)
|
||||
set(GGML_VERSION_PATCH 2)
|
||||
set(GGML_VERSION_MINOR 11)
|
||||
set(GGML_VERSION_PATCH 0)
|
||||
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
@@ -203,7 +203,6 @@
|
||||
#elif defined(__riscv)
|
||||
// quants.c
|
||||
#define ggml_vec_dot_nvfp4_q8_0_generic ggml_vec_dot_nvfp4_q8_0
|
||||
#define ggml_vec_dot_q1_0_q8_0_generic ggml_vec_dot_q1_0_q8_0
|
||||
// repack.cpp
|
||||
#define ggml_quantize_mat_q8_0_4x1_generic ggml_quantize_mat_q8_0_4x1
|
||||
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
||||
|
||||
@@ -480,6 +480,104 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__riscv_v)
|
||||
static NOINLINE void ggml_vec_dot_q1_0_q8_0_vl256(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy) {
|
||||
const int qk = QK1_0;
|
||||
const int nb = n / qk;
|
||||
assert(n % qk == 0);
|
||||
|
||||
const block_q1_0 * GGML_RESTRICT x = vx;
|
||||
const block_q8_0 * GGML_RESTRICT y = vy;
|
||||
|
||||
//LMUL = 1, VLMAX = 32
|
||||
const size_t vl32 = __riscv_vsetvl_e8m1(32);
|
||||
assert(vl32 == 32);
|
||||
|
||||
const vint16m1_t zero = __riscv_vmv_v_x_i16m1(0, 1);
|
||||
|
||||
float sumf = 0;
|
||||
|
||||
for (int ib = 0; ib < nb; ++ib) {
|
||||
const float d0 = GGML_CPU_FP16_TO_FP32(x[ib].d);
|
||||
|
||||
float acc = 0;
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
const block_q8_0 * GGML_RESTRICT yb = &y[ib * 4 + k];
|
||||
const vbool8_t is_not_zero = __riscv_vlm_v_b8(x[ib].qs + 4 * k, vl32);
|
||||
|
||||
const vint8m1_t qy = __riscv_vle8_v_i8m1(yb->qs, vl32);
|
||||
const vint8m1_t neg_qy = __riscv_vneg_v_i8m1(qy, vl32);
|
||||
const vint8m1_t sy = __riscv_vmerge_vvm_i8m1(neg_qy, qy, is_not_zero, vl32);
|
||||
|
||||
const vint16m1_t red = __riscv_vwredsum_vs_i8m1_i16m1(sy, zero, vl32);
|
||||
acc += GGML_CPU_FP16_TO_FP32(yb->d) * (float)__riscv_vmv_x_s_i16m1_i16(red);
|
||||
}
|
||||
|
||||
sumf += d0 * acc;
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
|
||||
static NOINLINE void ggml_vec_dot_q1_0_q8_0_vl128(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy) {
|
||||
const int qk = QK1_0;
|
||||
const int nb = n / qk;
|
||||
assert(n % qk == 0);
|
||||
|
||||
const block_q1_0 * GGML_RESTRICT x = vx;
|
||||
const block_q8_0 * GGML_RESTRICT y = vy;
|
||||
|
||||
//LMUL = 2, VLMAX = 32
|
||||
const size_t vl32 = __riscv_vsetvl_e8m2(32);
|
||||
assert(vl32 == 32);
|
||||
|
||||
const vint16m1_t zero = __riscv_vmv_v_x_i16m1(0, 1);
|
||||
|
||||
float sumf = 0;
|
||||
|
||||
for (int ib = 0; ib < nb; ++ib) {
|
||||
const float d0 = GGML_CPU_FP16_TO_FP32(x[ib].d);
|
||||
|
||||
float acc = 0;
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
const block_q8_0 * GGML_RESTRICT yb = &y[ib * 4 + k];
|
||||
const vbool4_t is_not_zero = __riscv_vlm_v_b4(x[ib].qs + 4 * k, vl32);
|
||||
|
||||
const vint8m2_t qy = __riscv_vle8_v_i8m2(yb->qs, vl32);
|
||||
const vint8m2_t neg_qy =__riscv_vneg_v_i8m2(qy, vl32);
|
||||
const vint8m2_t sy = __riscv_vmerge_vvm_i8m2(neg_qy, qy, is_not_zero, vl32);
|
||||
|
||||
const vint16m1_t red = __riscv_vwredsum_vs_i8m2_i16m1(sy, zero, vl32);
|
||||
acc += GGML_CPU_FP16_TO_FP32(yb->d) * (float)__riscv_vmv_x_s_i16m1_i16(red);
|
||||
}
|
||||
|
||||
sumf += d0 * acc;
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ggml_vec_dot_q1_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
#if defined(__riscv_v)
|
||||
assert(nrc == 1);
|
||||
|
||||
const size_t vlen_bits = __riscv_vlenb() * 8;
|
||||
|
||||
if (vlen_bits >= 256) {
|
||||
ggml_vec_dot_q1_0_q8_0_vl256(n, s, vx, vy);
|
||||
} else if (vlen_bits >= 128) {
|
||||
ggml_vec_dot_q1_0_q8_0_vl128(n, s, vx, vy);
|
||||
} else {
|
||||
ggml_vec_dot_q1_0_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||
}
|
||||
#else
|
||||
ggml_vec_dot_q1_0_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
UNUSED(nrc);
|
||||
|
||||
@@ -2965,6 +2965,45 @@ struct ggml_cplan ggml_graph_plan(
|
||||
return cplan;
|
||||
}
|
||||
|
||||
|
||||
// Try to fuse the current node with subsequent nodes for better performance.
|
||||
// Returns the number of nodes skipped by fusion (>=1), or 0 if no fusion was applied.
|
||||
static bool ggml_cpu_disable_fusion = false; // initialized once in ggml_cpu_init(), read-only afterwards
|
||||
|
||||
static int ggml_cpu_try_fuse_ops(
|
||||
const struct ggml_cgraph * cgraph,
|
||||
const int node_n,
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_cplan * cplan) {
|
||||
|
||||
if (ggml_cpu_disable_fusion || cplan->use_ref) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||
|
||||
if (node->op == GGML_OP_RMS_NORM) {
|
||||
// RMS_NORM + MUL fusion
|
||||
const enum ggml_op fuse_ops[] = { GGML_OP_RMS_NORM, GGML_OP_MUL };
|
||||
if (ggml_can_fuse(cgraph, node_n, fuse_ops, 2)) {
|
||||
struct ggml_tensor * mul_node = cgraph->nodes[node_n + 1];
|
||||
const struct ggml_tensor * mul_w = (mul_node->src[0] == node)
|
||||
? mul_node->src[1] : mul_node->src[0];
|
||||
if (node->src[0]->type == GGML_TYPE_F32 &&
|
||||
mul_node->type == GGML_TYPE_F32 &&
|
||||
mul_w->type == GGML_TYPE_F32 &&
|
||||
mul_w->ne[0] == node->ne[0] &&
|
||||
mul_w->nb[0] == sizeof(float)) {
|
||||
|
||||
ggml_compute_forward_rms_norm_mul_fused(params, node, mul_node);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
||||
struct ggml_threadpool * tp = state->threadpool;
|
||||
@@ -3001,7 +3040,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_compute_forward(¶ms, node);
|
||||
// TODO: move fused-op detection into ggml_graph_plan so fusion decisions are made once at planning time
|
||||
// Try fused ops, fall back to normal compute
|
||||
const int n_fused = ggml_cpu_try_fuse_ops(cgraph, node_n, ¶ms, cplan);
|
||||
if (n_fused > 0) {
|
||||
node_n += n_fused;
|
||||
} else {
|
||||
ggml_compute_forward(¶ms, node);
|
||||
}
|
||||
|
||||
if (state->ith == 0 && cplan->abort_callback &&
|
||||
cplan->abort_callback(cplan->abort_callback_data)) {
|
||||
@@ -3763,6 +3809,11 @@ void ggml_cpu_init(void) {
|
||||
ggml_init_riscv_arch_features();
|
||||
#endif
|
||||
|
||||
{
|
||||
const char * env = getenv("GGML_CPU_DISABLE_FUSION");
|
||||
ggml_cpu_disable_fusion = (env != NULL && atoi(env) == 1);
|
||||
}
|
||||
|
||||
is_first_call = false;
|
||||
}
|
||||
|
||||
|
||||
@@ -3713,11 +3713,27 @@ void ggml_compute_forward_norm(
|
||||
|
||||
// ggml_compute_forward_group_rms_norm
|
||||
|
||||
// fusion kinds that can be combined with the rms_norm computation in a single pass.
|
||||
// extend this enum when adding new fused variants (e.g. FUSE_ADD, FUSE_MUL_ADD, ...).
|
||||
enum ggml_rms_norm_fuse_op {
|
||||
GGML_RMS_NORM_FUSE_OP_NONE,
|
||||
GGML_RMS_NORM_FUSE_OP_MUL,
|
||||
};
|
||||
|
||||
template <ggml_rms_norm_fuse_op FUSE_OP>
|
||||
static void ggml_compute_forward_rms_norm_f32(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
ggml_tensor * dst_rms_norm,
|
||||
ggml_tensor * dst_fused = nullptr) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src0 = dst_rms_norm->src[0];
|
||||
const ggml_tensor * src1 = nullptr;
|
||||
ggml_tensor * dst = dst_rms_norm;
|
||||
|
||||
if constexpr (FUSE_OP == GGML_RMS_NORM_FUSE_OP_MUL) {
|
||||
src1 = (dst_fused->src[0] == dst_rms_norm) ? dst_fused->src[1] : dst_fused->src[0];
|
||||
dst = dst_fused;
|
||||
}
|
||||
|
||||
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
||||
|
||||
@@ -3726,11 +3742,10 @@ static void ggml_compute_forward_rms_norm_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
|
||||
memcpy(&eps, dst_rms_norm->op_params, sizeof(float));
|
||||
GGML_ASSERT(eps >= 0.0f);
|
||||
|
||||
// TODO: optimize
|
||||
@@ -3740,25 +3755,32 @@ static void ggml_compute_forward_rms_norm_f32(
|
||||
const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
||||
|
||||
ggml_float sum = 0.0;
|
||||
// worth switching to explicit SIMD?
|
||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||
sum += (ggml_float)(x[i00] * x[i00]);
|
||||
}
|
||||
|
||||
const float mean = sum/ne00;
|
||||
|
||||
float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
||||
|
||||
memcpy(y, x, ne00 * sizeof(float));
|
||||
// for (int i00 = 0; i00 < ne00; i00++) {
|
||||
// y[i00] = x[i00];
|
||||
// }
|
||||
|
||||
const float mean = sum/ne00;
|
||||
const float scale = 1.0f/sqrtf(mean + eps);
|
||||
|
||||
// if you hit this, likely you got an inf somewhere earlier
|
||||
assert(scale > 0.0f);
|
||||
|
||||
ggml_vec_scale_f32(ne00, y, scale);
|
||||
float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
||||
|
||||
if constexpr (FUSE_OP == GGML_RMS_NORM_FUSE_OP_MUL) {
|
||||
const int64_t i11 = i01 % ne11;
|
||||
const int64_t i12 = i02 % ne12;
|
||||
const int64_t i13 = i03 % ne13;
|
||||
const float * w = (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13);
|
||||
|
||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||
y[i00] = x[i00] * scale * w[i00];
|
||||
}
|
||||
} else {
|
||||
memcpy(y, x, ne00 * sizeof(float));
|
||||
ggml_vec_scale_f32(ne00, y, scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3773,7 +3795,31 @@ void ggml_compute_forward_rms_norm(
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_rms_norm_f32(params, dst);
|
||||
ggml_compute_forward_rms_norm_f32<GGML_RMS_NORM_FUSE_OP_NONE>(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fused RMS_NORM + MUL: computes dst = rms_norm(src0) * src1 in a single pass.
|
||||
// This avoids materializing the intermediate rms_norm result in memory.
|
||||
void ggml_compute_forward_rms_norm_mul_fused(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst_rms_norm,
|
||||
ggml_tensor * dst_mul) {
|
||||
|
||||
GGML_ASSERT(dst_mul != nullptr);
|
||||
GGML_ASSERT(dst_mul->src[0] == dst_rms_norm || dst_mul->src[1] == dst_rms_norm);
|
||||
|
||||
const ggml_tensor * src0 = dst_rms_norm->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_rms_norm_f32<GGML_RMS_NORM_FUSE_OP_MUL>(params, dst_rms_norm, dst_mul);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
|
||||
@@ -44,6 +44,7 @@ void ggml_compute_forward_concat(const struct ggml_compute_params * params, stru
|
||||
void ggml_compute_forward_silu_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_norm(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_rms_norm(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_rms_norm_mul_fused(const struct ggml_compute_params * params, struct ggml_tensor * dst_rms_norm, struct ggml_tensor * dst_mul);
|
||||
void ggml_compute_forward_rms_norm_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_group_norm(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_l2_norm(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
|
||||
@@ -742,17 +742,45 @@ static void transfer_output_chunk_threaded(struct htp_context *ctx, float *dst,
|
||||
// activations : fp32 -> fp16
|
||||
|
||||
static void transfer_activation_chunk_fp32_to_fp16(__fp16 *restrict vtcm_dst, const float *restrict src, int n_rows, int k_block, int k_stride) {
|
||||
for (int r = 0; r < n_rows; r += 2) {
|
||||
const int n_rows_padded = hex_align_up(n_rows, HMX_FP16_TILE_N_ROWS);
|
||||
const int n_rows_tiled = (n_rows / HMX_FP16_TILE_N_ROWS) * HMX_FP16_TILE_N_ROWS;
|
||||
|
||||
int r = 0;
|
||||
|
||||
#pragma unroll(2)
|
||||
for (r = 0; r < n_rows_tiled; r += 2) {
|
||||
int r0 = r / HMX_FP16_TILE_N_ROWS; // tile row index
|
||||
int r1 = r % HMX_FP16_TILE_N_ROWS; // intra-tile row idx
|
||||
|
||||
const bool next_row_valid = (r + 1) < n_rows;
|
||||
|
||||
const HVX_Vector *pv_in0 = (const HVX_Vector *) (src + (r + 0) * k_stride);
|
||||
const HVX_Vector *pv_in1 = (const HVX_Vector *) (src + (r + 1) * k_stride);
|
||||
for (int c = 0; c < k_block; c += 32) {
|
||||
HVX_Vector v0 = *pv_in0++;
|
||||
HVX_Vector v1 = next_row_valid ? *pv_in1++ : Q6_V_vzero();
|
||||
HVX_Vector v1 = *pv_in1++;
|
||||
|
||||
HVX_Vector v_out = hvx_vec_f32_to_f16_shuff(v0, v1);
|
||||
|
||||
// compute output position
|
||||
int c0 = c / HMX_FP16_TILE_N_COLS; // tile column index
|
||||
int tile_idx = r0 * (k_block / HMX_FP16_TILE_N_COLS) + c0;
|
||||
|
||||
HVX_Vector *tile = (HVX_Vector *) (vtcm_dst + tile_idx * HMX_FP16_TILE_N_ELMS);
|
||||
tile[r1 / 2] = v_out;
|
||||
}
|
||||
}
|
||||
|
||||
for (; r < n_rows_padded; r += 2) {
|
||||
int r0 = r / HMX_FP16_TILE_N_ROWS; // tile row index
|
||||
int r1 = r % HMX_FP16_TILE_N_ROWS; // intra-tile row idx
|
||||
|
||||
const bool row0_valid = r < n_rows;
|
||||
const bool row1_valid = (r + 1) < n_rows;
|
||||
|
||||
const HVX_Vector *pv_in0 = row0_valid ? (const HVX_Vector *) (src + (r + 0) * k_stride) : NULL;
|
||||
const HVX_Vector *pv_in1 = row1_valid ? (const HVX_Vector *) (src + (r + 1) * k_stride) : NULL;
|
||||
for (int c = 0; c < k_block; c += 32) {
|
||||
HVX_Vector v0 = row0_valid ? *pv_in0++ : Q6_V_vzero();
|
||||
HVX_Vector v1 = row1_valid ? *pv_in1++ : Q6_V_vzero();
|
||||
|
||||
HVX_Vector v_out = hvx_vec_f32_to_f16_shuff(v0, v1);
|
||||
|
||||
@@ -889,7 +917,9 @@ static __attribute__((noinline)) int mat_mul_qk_0_d16a32_out_stationary(struct h
|
||||
// n_block_cost = m*2: each extra N-block re-loads all M×K activation (cheaper).
|
||||
const size_t m_block_cost = (size_t) n * 3;
|
||||
const size_t n_block_cost = (size_t) m * 2;
|
||||
if (hmx_compute_chunks(vtcm_budget, overhead, per_n, per_m, per_mn, m, n, m_block_cost, n_block_cost, &M_BLOCK_SIZE,
|
||||
if (hmx_compute_chunks(vtcm_budget, overhead, per_n, per_m, per_mn,
|
||||
hex_align_up(m, HMX_FP16_TILE_N_ROWS), n,
|
||||
m_block_cost, n_block_cost, &M_BLOCK_SIZE,
|
||||
&N_BLOCK_SIZE, &vtcm_used) != 0) {
|
||||
FARF(HIGH, "%s: VTCM too small (m=%d k=%d n=%d budget=%zu)", __func__, m, k, n, vtcm_budget);
|
||||
return -1;
|
||||
@@ -1084,7 +1114,8 @@ int hmx_mat_mul_permuted_qk_0_d16a32(struct htp_context *ctx, float *restrict ds
|
||||
|
||||
if (m >= 128) {
|
||||
size_t mc = 0, nc = 0, used = 0;
|
||||
if (hmx_compute_chunks(vtcm_budget, /*overhead=*/256, pipe_per_n, /*per_m=*/vec_dot_size, pipe_per_mn, m, n,
|
||||
if (hmx_compute_chunks(vtcm_budget, /*overhead=*/256, pipe_per_n, /*per_m=*/vec_dot_size, pipe_per_mn,
|
||||
hex_align_up(m, HMX_FP16_TILE_N_ROWS), n,
|
||||
/*m_block_cost=*/(size_t) n * 3,
|
||||
/*n_block_cost=*/(size_t) m * 2, &mc, &nc, &used) == 0 &&
|
||||
hmx_ceil_div((size_t) n, nc) >= 2) {
|
||||
@@ -1096,7 +1127,8 @@ int hmx_mat_mul_permuted_qk_0_d16a32(struct htp_context *ctx, float *restrict ds
|
||||
}
|
||||
|
||||
if (!use_pipeline) {
|
||||
if (hmx_compute_chunks(vtcm_budget, /*overhead=*/256, seq_per_n, /*per_m=*/vec_dot_size, seq_per_mn, m, n,
|
||||
if (hmx_compute_chunks(vtcm_budget, /*overhead=*/256, seq_per_n, /*per_m=*/vec_dot_size, seq_per_mn,
|
||||
hex_align_up(m, HMX_FP16_TILE_N_ROWS), n,
|
||||
/*m_block_cost=*/(size_t) n * 3,
|
||||
/*n_block_cost=*/(size_t) m * 2, &m_chunk_n_rows, &n_chunk_n_cols, &vtcm_used) != 0) {
|
||||
FARF(HIGH, "%s: VTCM too small (m=%d k=%d n=%d budget=%zu)", __func__, m, k, n, vtcm_budget);
|
||||
@@ -1432,7 +1464,8 @@ int hmx_mat_mul_permuted_w16a32_batched(struct htp_context *ctx, const hmx_matmu
|
||||
if (hmx_compute_chunks(vtcm_budget, /*overhead=*/256,
|
||||
/*per_n=*/3 * vec_dot_size,
|
||||
/*per_m=*/group_size * vec_dot_size + f32_scratch_per_m,
|
||||
/*per_mn=*/sizeof(__fp16), params->m, params->n,
|
||||
/*per_mn=*/sizeof(__fp16),
|
||||
hex_align_up(params->m, HMX_FP16_TILE_N_ROWS), params->n,
|
||||
/*m_block_cost=*/(size_t) params->n,
|
||||
/*n_block_cost=*/(size_t) params->m, &m_chunk_n_rows, &n_chunk_n_cols, &vtcm_used) != 0) {
|
||||
FARF(HIGH, "%s: grouped path does not fit VTCM, falling back to legacy batched loop", __func__);
|
||||
@@ -1612,7 +1645,7 @@ int hmx_mat_mul_permuted_w16a32(struct htp_context *ctx, float *restrict dst, co
|
||||
/*per_n=*/3 * vec_dot_size, // W + S0 + S1
|
||||
/*per_m=*/vec_dot_size + f32_scratch_per_m, // A + optional F32 scratch
|
||||
/*per_mn=*/sizeof(__fp16), // O
|
||||
m, n,
|
||||
hex_align_up(m, HMX_FP16_TILE_N_ROWS), n,
|
||||
/*m_block_cost=*/(size_t) n,
|
||||
/*n_block_cost=*/(size_t) m, &m_chunk_n_rows, &n_chunk_n_cols, &vtcm_used) != 0) {
|
||||
FARF(HIGH, "%s: VTCM too small (m=%d k=%d n=%d budget=%zu)", __func__, m, k, n, vtcm_budget);
|
||||
|
||||
@@ -2991,12 +2991,10 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||
return op_matmul_hvx(octx);
|
||||
}
|
||||
|
||||
// M alignment: when M > 32 but not 32-aligned, we split into
|
||||
// HMX (first m_hmx = M & ~31 rows) + HVX (remaining m_tail rows).
|
||||
// When M <= 32 and not 32-aligned, fall back entirely to HVX.
|
||||
// M alignment: Use HMX when M >= 32, the last partial tile (m_total % 32 rows)
|
||||
// is handled by HMX itself; when M < 32 fall back to HVX.
|
||||
const int m_total = (int) src1->ne[1];
|
||||
const int m_tail = m_total % 32;
|
||||
const int m_hmx = m_total - m_tail;
|
||||
const int m_hmx = m_total & ~31; // 0 when M < 32
|
||||
|
||||
if (m_hmx == 0) {
|
||||
return op_matmul_hvx(octx);
|
||||
@@ -3009,7 +3007,6 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||
int k = (int) src0->ne[0]; // inner dimension
|
||||
int n = (int) src0->ne[1]; // weight columns
|
||||
|
||||
// --- Phase 1: HMX on the first m_hmx (32-aligned) rows ---
|
||||
int ret = -1;
|
||||
|
||||
// Row strides in elements. For compact tensors these equal k; for
|
||||
@@ -3027,7 +3024,7 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||
.dst = (float *) dst->data,
|
||||
.activation = (float *) src1->data,
|
||||
.permuted_weight = (const __fp16 *) src0->data,
|
||||
.m = m_hmx,
|
||||
.m = m_total,
|
||||
.k = k,
|
||||
.n = n,
|
||||
.act_stride = act_stride,
|
||||
@@ -3048,12 +3045,12 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||
} else {
|
||||
ret = hmx_mat_mul_permuted_w16a32(octx->ctx,
|
||||
(float*) dst->data, (float*) src1->data, (const __fp16 *) src0->data,
|
||||
m_hmx, k, n, act_stride, wgt_stride);
|
||||
m_total, k, n, act_stride, wgt_stride);
|
||||
}
|
||||
} else {
|
||||
ret = hmx_mat_mul_permuted_qk_0_d16a32(octx->ctx,
|
||||
(float*) dst->data, (float*) src1->data, (const uint8_t *) src0->data,
|
||||
m_hmx, k, n, (int) src0->type);
|
||||
m_total, k, n, (int) src0->type);
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
@@ -3061,27 +3058,6 @@ int op_matmul(struct htp_ops_context * octx) {
|
||||
return op_matmul(octx);
|
||||
}
|
||||
|
||||
// --- Phase 2: HVX on the remaining m_tail rows ---
|
||||
if (m_tail > 0) {
|
||||
// copy of src1 and dst
|
||||
struct htp_tensor src1_tail = *src1;
|
||||
struct htp_tensor dst_tail = *dst;
|
||||
|
||||
src1_tail.ne[1] = m_tail; // only tail rows
|
||||
dst_tail.ne[1] = m_tail; // only tail rows
|
||||
|
||||
// Offset activation and dst pointers past the HMX-processed rows.
|
||||
// Use nb[1] (row stride in bytes) to compute the byte offset.
|
||||
src1_tail.data += (uint32_t) m_hmx * src1->nb[1];
|
||||
dst_tail.data += (uint32_t) m_hmx * dst->nb[1];
|
||||
|
||||
octx->src[1] = &src1_tail;
|
||||
octx->dst = &dst_tail;
|
||||
|
||||
FARF(HIGH, "hmx-matmul: HVX tail m_tail %d src1 %p dst %p", m_tail, (void *) src1_tail.data, (void *) dst_tail.data);
|
||||
return op_matmul_hvx(octx);
|
||||
}
|
||||
|
||||
return 0;
|
||||
#endif // HTP_HAS_HMX
|
||||
}
|
||||
|
||||
@@ -66,8 +66,6 @@ set(GGML_OPENCL_KERNELS
|
||||
diag
|
||||
div
|
||||
gelu
|
||||
gemv_noshuffle_general
|
||||
gemv_noshuffle
|
||||
get_rows
|
||||
glu
|
||||
group_norm
|
||||
@@ -75,7 +73,6 @@ set(GGML_OPENCL_KERNELS
|
||||
im2col_f32
|
||||
im2col_f16
|
||||
mean
|
||||
mul_mat_Ab_Bi_8x4
|
||||
mul_mv_f16_f16
|
||||
mul_mv_f16_f32_1row
|
||||
mul_mv_f16_f32_l4
|
||||
@@ -120,12 +117,15 @@ set(GGML_OPENCL_KERNELS
|
||||
mul_mm_q4_k_f32_l4_lm
|
||||
mul_mm_q5_k_f32_l4_lm
|
||||
mul_mm_q6_k_f32_l4_lm
|
||||
mul_mm_q8_0_f32_8x4
|
||||
gemv_noshuffle_q4_0_f32
|
||||
gemv_noshuffle_q4_0_f32_spec
|
||||
gemm_noshuffle_q4_0_f32
|
||||
gemv_noshuffle_q4_1_f32
|
||||
gemm_noshuffle_q4_1_f32
|
||||
gemv_noshuffle_iq4_nl_f32
|
||||
gemm_noshuffle_iq4_nl_f32
|
||||
gemv_noshuffle_general_q8_0_f32
|
||||
gemv_noshuffle_q8_0_f32
|
||||
gemm_noshuffle_q8_0_f32
|
||||
gemv_noshuffle_q4_k_f32
|
||||
gemm_noshuffle_q4_k_f32
|
||||
gemv_noshuffle_q6_k_f32
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,7 +17,7 @@
|
||||
REQD_SUBGROUP_SIZE_128
|
||||
#endif
|
||||
|
||||
kernel void kernel_mul_mat_Ab_Bi_8x4(
|
||||
kernel void kernel_gemm_noshuffle_q4_0_f32(
|
||||
global const ushort * src0_q, // quantized A
|
||||
global const half * src0_d, // A scales
|
||||
__read_only image1d_buffer_t src1, // B (1d image)
|
||||
@@ -11,7 +11,7 @@
|
||||
REQD_SUBGROUP_SIZE_128
|
||||
#endif
|
||||
|
||||
kernel void kernel_mul_mm_q8_0_f32_8x4(
|
||||
kernel void kernel_gemm_noshuffle_q8_0_f32(
|
||||
global const uint * src0_q,
|
||||
global const half * src0_d,
|
||||
__read_only image1d_buffer_t src1,
|
||||
@@ -191,7 +191,7 @@
|
||||
#ifdef ADRENO_GPU
|
||||
REQD_SUBGROUP_SIZE_64
|
||||
#endif
|
||||
__kernel void kernel_gemv_noshuffle(
|
||||
__kernel void kernel_gemv_noshuffle_q4_0_f32(
|
||||
__read_only image1d_buffer_t src0_q, // quantized A
|
||||
global half2 * src0_d, // A scales
|
||||
__read_only image1d_buffer_t src1, // B
|
||||
@@ -238,21 +238,21 @@ __kernel void kernel_gemv_noshuffle(
|
||||
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 1)).x;
|
||||
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 2)).x;
|
||||
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 3)).x;
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAT
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAST
|
||||
dequantizeBlockAccum_ns_sgbroadcast_8_hi(totalSum, as_ushort8(regA), regS, regB);
|
||||
#else
|
||||
dequantizeBlockAccum_ns_sgbroadcast_1_hi(totalSum, as_ushort8(regA), regS, regB);
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAT
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAST
|
||||
|
||||
regA.s0 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 4)).x;
|
||||
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 5)).x;
|
||||
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 6)).x;
|
||||
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 7)).x;
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAT
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAST
|
||||
dequantizeBlockAccum_ns_sgbroadcast_8_lo(totalSum, as_ushort8(regA), regS, regB);
|
||||
#else
|
||||
dequantizeBlockAccum_ns_sgbroadcast_1_lo(totalSum, as_ushort8(regA), regS, regB);
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAT
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAST
|
||||
}
|
||||
|
||||
// reduction in local memory, assumes #wave=4
|
||||
@@ -191,7 +191,7 @@
|
||||
#ifdef ADRENO_GPU
|
||||
REQD_SUBGROUP_SIZE_64
|
||||
#endif
|
||||
__kernel void kernel_gemv_noshuffle(
|
||||
__kernel void kernel_gemv_noshuffle_q4_0_f32(
|
||||
__read_only image1d_buffer_t src0_q, // quantized A
|
||||
global half2 * src0_d, // A scales
|
||||
__read_only image1d_buffer_t src1, // B
|
||||
@@ -232,21 +232,21 @@ __kernel void kernel_gemv_noshuffle(
|
||||
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 1)).x;
|
||||
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 2)).x;
|
||||
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 3)).x;
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAT
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAST
|
||||
dequantizeBlockAccum_ns_sgbroadcast_8_hi(totalSum, as_ushort8(regA), regS, regB);
|
||||
#else
|
||||
dequantizeBlockAccum_ns_sgbroadcast_1_hi(totalSum, as_ushort8(regA), regS, regB);
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAT
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAST
|
||||
|
||||
regA.s0 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 4)).x;
|
||||
regA.s1 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 5)).x;
|
||||
regA.s2 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 6)).x;
|
||||
regA.s3 = read_imageui(src0_q, (gid + k * BLOCK_STRIDE_A + LINE_STRIDE_A * 7)).x;
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAT
|
||||
#ifdef VECTOR_SUB_GROUP_BROADCAST
|
||||
dequantizeBlockAccum_ns_sgbroadcast_8_lo(totalSum, as_ushort8(regA), regS, regB);
|
||||
#else
|
||||
dequantizeBlockAccum_ns_sgbroadcast_1_lo(totalSum, as_ushort8(regA), regS, regB);
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAT
|
||||
#endif // VECTOR_SUB_GROUP_BROADCAST
|
||||
}
|
||||
|
||||
// reduction in local memory, assumes #wave=4
|
||||
@@ -207,35 +207,11 @@ struct ggml_backend_rpc_buffer_type_context {
|
||||
size_t max_size;
|
||||
};
|
||||
|
||||
struct graph_cache {
|
||||
|
||||
bool is_cached(const ggml_cgraph * cgraph) {
|
||||
if ((int)last_graph.size() != cgraph->n_nodes) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
if (memcmp(&last_graph[i], cgraph->nodes[i], sizeof(ggml_tensor)) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void add(const ggml_cgraph * cgraph) {
|
||||
last_graph.resize(cgraph->n_nodes);
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
memcpy(&last_graph[i], cgraph->nodes[i], sizeof(ggml_tensor));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ggml_tensor> last_graph;
|
||||
};
|
||||
|
||||
struct ggml_backend_rpc_context {
|
||||
std::string endpoint;
|
||||
uint32_t device;
|
||||
std::string name;
|
||||
graph_cache gc;
|
||||
uint64_t last_graph_uid;
|
||||
};
|
||||
|
||||
struct ggml_backend_rpc_buffer_context {
|
||||
@@ -717,7 +693,7 @@ static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, g
|
||||
ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context;
|
||||
|
||||
GGML_ASSERT(cgraph->n_nodes > 0);
|
||||
bool reuse = rpc_ctx->gc.is_cached(cgraph);
|
||||
bool reuse = cgraph->uid != 0 && rpc_ctx->last_graph_uid == cgraph->uid;
|
||||
if (reuse) {
|
||||
rpc_msg_graph_recompute_req request;
|
||||
request.device = rpc_ctx->device;
|
||||
@@ -725,7 +701,7 @@ static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, g
|
||||
bool status = send_rpc_cmd(sock, RPC_CMD_GRAPH_RECOMPUTE, &request, sizeof(request));
|
||||
RPC_STATUS_ASSERT(status);
|
||||
} else {
|
||||
rpc_ctx->gc.add(cgraph);
|
||||
rpc_ctx->last_graph_uid = cgraph->uid;
|
||||
std::vector<uint8_t> input;
|
||||
serialize_graph(rpc_ctx->device, cgraph, input);
|
||||
auto sock = get_socket(rpc_ctx->endpoint);
|
||||
@@ -791,10 +767,10 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, u
|
||||
ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device) {
|
||||
std::string dev_name = "RPC" + std::to_string(device) + "[" + std::string(endpoint) + "]";
|
||||
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
|
||||
/* .endpoint = */ endpoint,
|
||||
/* .device = */ device,
|
||||
/* .name = */ dev_name,
|
||||
/* .gc = */ {},
|
||||
/* .endpoint = */ endpoint,
|
||||
/* .device = */ device,
|
||||
/* .name = */ dev_name,
|
||||
/* .last_graph_uid = */ 0,
|
||||
};
|
||||
auto reg = ggml_backend_rpc_add_server(endpoint);
|
||||
ggml_backend_t backend = new ggml_backend {
|
||||
|
||||
@@ -175,6 +175,7 @@ class Keys:
|
||||
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
||||
SCALE = "{arch}.attention.scale"
|
||||
OUTPUT_SCALE = "{arch}.attention.output_scale"
|
||||
VALUE_SCALE = "{arch}.attention.value_scale"
|
||||
TEMPERATURE_LENGTH = "{arch}.attention.temperature_length"
|
||||
KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
|
||||
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
|
||||
@@ -339,6 +340,9 @@ class Keys:
|
||||
FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
|
||||
PROJECTION_DIM = "clip.audio.projection_dim"
|
||||
BLOCK_COUNT = "clip.audio.block_count"
|
||||
CHUNK_SIZE = "clip.audio.chunk_size"
|
||||
CONV_KERNEL_SIZE = "clip.audio.conv_kernel_size"
|
||||
MAX_POS_EMB = "clip.audio.max_pos_emb"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "clip.audio.attention.head_count"
|
||||
@@ -346,6 +350,9 @@ class Keys:
|
||||
|
||||
class Projector:
|
||||
STACK_FACTOR = "clip.audio.projector.stack_factor"
|
||||
WINDOW_SIZE = "clip.audio.projector.window_size"
|
||||
DOWNSAMPLE_RATE = "clip.audio.projector.downsample_rate"
|
||||
HEAD_COUNT = "clip.audio.projector.head_count"
|
||||
|
||||
class Diffusion:
|
||||
SHIFT_LOGITS = "diffusion.shift_logits"
|
||||
@@ -767,6 +774,14 @@ class MODEL_TENSOR(IntEnum):
|
||||
V_DS_NORM = auto() # qwen3vl
|
||||
V_DS_FC1 = auto() # qwen3vl
|
||||
V_DS_FC2 = auto() # qwen3vl
|
||||
V_MERGER_LN1 = auto() # minicpmv4_6
|
||||
V_MERGER_ATTN_Q = auto() # minicpmv4_6
|
||||
V_MERGER_ATTN_K = auto() # minicpmv4_6
|
||||
V_MERGER_ATTN_V = auto() # minicpmv4_6
|
||||
V_MERGER_ATTN_O = auto() # minicpmv4_6
|
||||
V_MERGER_DS_LN = auto() # minicpmv4_6
|
||||
V_MERGER_DS_UP = auto() # minicpmv4_6
|
||||
V_MERGER_DS_DOWN = auto() # minicpmv4_6
|
||||
V_MM_POST_FC_NORM = auto() # cogvlm
|
||||
V_MM_UP = auto() # cogvlm
|
||||
V_MM_DOWN = auto() # cogvlm
|
||||
@@ -854,6 +869,26 @@ class MODEL_TENSOR(IntEnum):
|
||||
A_ENC_CONV_NORM = auto() # SSM conv
|
||||
A_ENC_CONV_PW1 = auto()
|
||||
A_ENC_CONV_PW2 = auto()
|
||||
A_CTC_OUT = auto()
|
||||
A_CTC_OUT_MID = auto()
|
||||
A_ENC_ATTN_REL_POS_EMB = auto()
|
||||
# qformer projector
|
||||
A_QF_PROJ_QUERY = auto()
|
||||
A_QF_PROJ_NORM = auto()
|
||||
A_QF_PROJ_LINEAR = auto()
|
||||
A_QF_SELF_ATTN_Q = auto()
|
||||
A_QF_SELF_ATTN_K = auto()
|
||||
A_QF_SELF_ATTN_V = auto()
|
||||
A_QF_SELF_ATTN_O = auto()
|
||||
A_QF_SELF_ATTN_NORM = auto()
|
||||
A_QF_CROSS_ATTN_Q = auto()
|
||||
A_QF_CROSS_ATTN_K = auto()
|
||||
A_QF_CROSS_ATTN_V = auto()
|
||||
A_QF_CROSS_ATTN_O = auto()
|
||||
A_QF_CROSS_ATTN_NORM = auto()
|
||||
A_QF_FFN_UP = auto()
|
||||
A_QF_FFN_DOWN = auto()
|
||||
A_QF_FFN_NORM = auto()
|
||||
|
||||
|
||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
@@ -1251,6 +1286,14 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
MODEL_TENSOR.V_DS_NORM: "v.deepstack.{bid}.norm",
|
||||
MODEL_TENSOR.V_DS_FC1: "v.deepstack.{bid}.fc1",
|
||||
MODEL_TENSOR.V_DS_FC2: "v.deepstack.{bid}.fc2",
|
||||
MODEL_TENSOR.V_MERGER_LN1: "v.vit_merger.ln1",
|
||||
MODEL_TENSOR.V_MERGER_ATTN_Q: "v.vit_merger.attn_q",
|
||||
MODEL_TENSOR.V_MERGER_ATTN_K: "v.vit_merger.attn_k",
|
||||
MODEL_TENSOR.V_MERGER_ATTN_V: "v.vit_merger.attn_v",
|
||||
MODEL_TENSOR.V_MERGER_ATTN_O: "v.vit_merger.attn_out",
|
||||
MODEL_TENSOR.V_MERGER_DS_LN: "v.vit_merger.ds_ln",
|
||||
MODEL_TENSOR.V_MERGER_DS_UP: "v.vit_merger.ds_ffn_up",
|
||||
MODEL_TENSOR.V_MERGER_DS_DOWN: "v.vit_merger.ds_ffn_down",
|
||||
MODEL_TENSOR.V_MM_POST_FC_NORM: "mm.post_fc_norm", # cogvlm
|
||||
MODEL_TENSOR.V_MM_UP: "mm.up",
|
||||
MODEL_TENSOR.V_MM_DOWN: "mm.down",
|
||||
@@ -1333,6 +1376,26 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
MODEL_TENSOR.A_ENC_CONV_NORM: "a.blk.{bid}.conv_norm",
|
||||
MODEL_TENSOR.A_ENC_CONV_PW1: "a.blk.{bid}.conv_pw1",
|
||||
MODEL_TENSOR.A_ENC_CONV_PW2: "a.blk.{bid}.conv_pw2",
|
||||
MODEL_TENSOR.A_CTC_OUT: "a.enc_ctc_out",
|
||||
MODEL_TENSOR.A_CTC_OUT_MID: "a.enc_ctc_out_mid",
|
||||
MODEL_TENSOR.A_ENC_ATTN_REL_POS_EMB: "a.blk.{bid}.attn_rel_pos_emb",
|
||||
# qformer projector
|
||||
MODEL_TENSOR.A_QF_PROJ_QUERY: "a.proj_query",
|
||||
MODEL_TENSOR.A_QF_PROJ_NORM: "a.proj_norm",
|
||||
MODEL_TENSOR.A_QF_PROJ_LINEAR: "a.proj_linear",
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_Q: "a.proj_blk.{bid}.self_attn_q",
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_K: "a.proj_blk.{bid}.self_attn_k",
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_V: "a.proj_blk.{bid}.self_attn_v",
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_O: "a.proj_blk.{bid}.self_attn_out",
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_NORM: "a.proj_blk.{bid}.self_attn_norm",
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_Q: "a.proj_blk.{bid}.cross_attn_q",
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_K: "a.proj_blk.{bid}.cross_attn_k",
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_V: "a.proj_blk.{bid}.cross_attn_v",
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_O: "a.proj_blk.{bid}.cross_attn_out",
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_NORM: "a.proj_blk.{bid}.cross_attn_norm",
|
||||
MODEL_TENSOR.A_QF_FFN_UP: "a.proj_blk.{bid}.ffn_up",
|
||||
MODEL_TENSOR.A_QF_FFN_DOWN: "a.proj_blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.A_QF_FFN_NORM: "a.proj_blk.{bid}.ffn_norm",
|
||||
# NextN/MTP
|
||||
MODEL_TENSOR.NEXTN_EH_PROJ: "blk.{bid}.nextn.eh_proj",
|
||||
MODEL_TENSOR.NEXTN_EMBED_TOKENS: "blk.{bid}.nextn.embed_tokens",
|
||||
@@ -1403,6 +1466,14 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.V_DS_NORM,
|
||||
MODEL_TENSOR.V_DS_FC1,
|
||||
MODEL_TENSOR.V_DS_FC2,
|
||||
MODEL_TENSOR.V_MERGER_LN1,
|
||||
MODEL_TENSOR.V_MERGER_ATTN_Q,
|
||||
MODEL_TENSOR.V_MERGER_ATTN_K,
|
||||
MODEL_TENSOR.V_MERGER_ATTN_V,
|
||||
MODEL_TENSOR.V_MERGER_ATTN_O,
|
||||
MODEL_TENSOR.V_MERGER_DS_LN,
|
||||
MODEL_TENSOR.V_MERGER_DS_UP,
|
||||
MODEL_TENSOR.V_MERGER_DS_DOWN,
|
||||
MODEL_TENSOR.V_MM_POST_FC_NORM,
|
||||
MODEL_TENSOR.V_MM_UP,
|
||||
MODEL_TENSOR.V_MM_DOWN,
|
||||
@@ -1480,6 +1551,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.A_MM_HARD_EMB_NORM,
|
||||
MODEL_TENSOR.A_PER_DIM_K_SCALE,
|
||||
MODEL_TENSOR.A_PER_DIM_SCALE,
|
||||
MODEL_TENSOR.A_CTC_OUT,
|
||||
MODEL_TENSOR.A_CTC_OUT_MID,
|
||||
MODEL_TENSOR.A_ENC_ATTN_REL_POS_EMB,
|
||||
# qformer projector
|
||||
MODEL_TENSOR.A_QF_PROJ_QUERY,
|
||||
MODEL_TENSOR.A_QF_PROJ_NORM,
|
||||
MODEL_TENSOR.A_QF_PROJ_LINEAR,
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_Q,
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_K,
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_V,
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_O,
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_NORM,
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_Q,
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_K,
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_V,
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_O,
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_NORM,
|
||||
MODEL_TENSOR.A_QF_FFN_UP,
|
||||
MODEL_TENSOR.A_QF_FFN_DOWN,
|
||||
MODEL_TENSOR.A_QF_FFN_NORM,
|
||||
],
|
||||
MODEL_ARCH.LLAMA: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
@@ -3778,6 +3869,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_QKV,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
@@ -3792,6 +3884,10 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||
MODEL_TENSOR.FFN_UP_EXP,
|
||||
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
||||
MODEL_TENSOR.LAYER_OUT_NORM,
|
||||
MODEL_TENSOR.NEXTN_EH_PROJ,
|
||||
MODEL_TENSOR.NEXTN_ENORM,
|
||||
MODEL_TENSOR.NEXTN_HNORM,
|
||||
],
|
||||
MODEL_ARCH.STEP35: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
@@ -4158,6 +4254,8 @@ class VisionProjectorType:
|
||||
NEMOTRON_V2_VL = "nemotron_v2_vl"
|
||||
HUNYUANOCR = "hunyuanocr"
|
||||
HUNYUANVL = "hunyuanvl"
|
||||
MINICPMV4_6 = "minicpmv4_6"
|
||||
GRANITE_SPEECH = "granite_speech" # audio
|
||||
|
||||
|
||||
# Items here are (block size, type size)
|
||||
|
||||
@@ -943,6 +943,9 @@ class GGUFWriter:
|
||||
def add_attn_output_scale(self, value: float) -> None:
|
||||
self.add_float32(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
|
||||
|
||||
def add_attn_value_scale(self, value: float) -> None:
|
||||
self.add_float32(Keys.Attention.VALUE_SCALE.format(arch=self.arch), value)
|
||||
|
||||
def add_attn_temperature_length(self, value: int) -> None:
|
||||
self.add_uint32(Keys.Attention.TEMPERATURE_LENGTH.format(arch=self.arch), value)
|
||||
|
||||
@@ -1260,6 +1263,24 @@ class GGUFWriter:
|
||||
def add_audio_stack_factor(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
|
||||
|
||||
def add_audio_chunk_size(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.CHUNK_SIZE, value)
|
||||
|
||||
def add_audio_conv_kernel_size(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.CONV_KERNEL_SIZE, value)
|
||||
|
||||
def add_audio_max_pos_emb(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.MAX_POS_EMB, value)
|
||||
|
||||
def add_audio_projector_window_size(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.Projector.WINDOW_SIZE, value)
|
||||
|
||||
def add_audio_projector_downsample_rate(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.Projector.DOWNSAMPLE_RATE, value)
|
||||
|
||||
def add_audio_projector_head_count(self, value: int) -> None:
|
||||
self.add_uint32(Keys.ClipAudio.Projector.HEAD_COUNT, value)
|
||||
|
||||
def add_xielu_alpha_p(self, values: Sequence[float]):
|
||||
self.add_array(Keys.xIELU.ALPHA_P, values)
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ class TensorNameMap:
|
||||
"tok_embeddings", # llama-pth
|
||||
"embeddings.word_embeddings", # bert nomic-bert
|
||||
"embeddings.tok_embeddings", # modern-bert
|
||||
"language_model.embedding.word_embeddings", # persimmon
|
||||
"wte", # gpt2
|
||||
"transformer.embd.wte", # phi2
|
||||
"model.tok_embeddings", # internlm2
|
||||
@@ -32,7 +31,6 @@ class TensorNameMap:
|
||||
"rwkv.embeddings", # rwkv6
|
||||
"model.embeddings", # rwkv7
|
||||
"model.word_embeddings", # bailingmoe
|
||||
"language_model.model.embed_tokens", # llama4
|
||||
"encoder", # neobert
|
||||
"model.transformer.wte", # llada
|
||||
"embed_tokens", # qwen3-embedding
|
||||
@@ -94,7 +92,6 @@ class TensorNameMap:
|
||||
"norm", # llama-pth
|
||||
"transformer.norm_f", # mpt dbrx
|
||||
"ln_f", # refact bloom qwen gpt2
|
||||
"language_model.encoder.final_layernorm", # persimmon
|
||||
"model.final_layernorm", # persimmon
|
||||
"lm_head.ln", # phi2
|
||||
"model.norm_f", # mamba-qbert
|
||||
@@ -158,6 +155,21 @@ class TensorNameMap:
|
||||
MODEL_TENSOR.V_ENC_MSFA_NORM: (
|
||||
"model.vision_tower.timm_model.msfa.norm", # gemma3n
|
||||
),
|
||||
MODEL_TENSOR.A_CTC_OUT: (
|
||||
"encoder.out",
|
||||
),
|
||||
MODEL_TENSOR.A_CTC_OUT_MID: (
|
||||
"encoder.out_mid",
|
||||
),
|
||||
MODEL_TENSOR.A_QF_PROJ_QUERY: (
|
||||
"projector.query",
|
||||
),
|
||||
MODEL_TENSOR.A_QF_PROJ_NORM: (
|
||||
"projector.qformer.layernorm",
|
||||
),
|
||||
MODEL_TENSOR.A_QF_PROJ_LINEAR: (
|
||||
"projector.linear",
|
||||
),
|
||||
}
|
||||
|
||||
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
|
||||
@@ -171,7 +183,6 @@ class TensorNameMap:
|
||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||
"model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid
|
||||
"layers.{bid}.attention_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||
"model.layers.{bid}.ln1", # yi
|
||||
"h.{bid}.ln_1", # gpt2
|
||||
"transformer.h.{bid}.ln", # phi2
|
||||
@@ -215,7 +226,6 @@ class TensorNameMap:
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||
"h.{bid}.self_attention.query_key_value", # bloom
|
||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||
"model.layers.{bid}.self_attn.query_key_value", # persimmon
|
||||
"model.layers.{bid}.attention.query_key_value", # bailingmoe2
|
||||
"h.{bid}.attn.c_attn", # gpt2
|
||||
@@ -306,7 +316,6 @@ class TensorNameMap:
|
||||
"layers.{bid}.attn.Wo", # modern-bert
|
||||
"transformer.layer.{bid}.attention.out_lin", # distillbert
|
||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||
"model.layers.{bid}.self_attn.dense", # persimmon
|
||||
"model.layers.{bid}.attention.dense", # bailingmoe2
|
||||
"h.{bid}.attn.c_proj", # gpt2
|
||||
@@ -373,7 +382,6 @@ class TensorNameMap:
|
||||
"transformer.blocks.{bid}.norm_2", # mpt
|
||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron olmoe phimoe
|
||||
"layers.{bid}.ffn_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||
"model.layers.{bid}.ln2", # yi
|
||||
"h.{bid}.ln_2", # gpt2
|
||||
"model.layers.{bid}.ffn_norm", # internlm2
|
||||
@@ -475,7 +483,6 @@ class TensorNameMap:
|
||||
"transformer.layer.{bid}.ffn.lin1", # distillbert
|
||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||
"transformer.h.{bid}.mlp.linear_3", # refact
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
"transformer.h.{bid}.mlp.w1", # qwen
|
||||
"h.{bid}.mlp.c_fc", # gpt2
|
||||
@@ -608,7 +615,6 @@ class TensorNameMap:
|
||||
"layers.{bid}.mlp.Wo", # modern-bert
|
||||
"transformer.layer.{bid}.ffn.lin2", # distillbert
|
||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
"model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
"h.{bid}.mlp.c_proj", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc2", # phi2
|
||||
@@ -663,7 +669,7 @@ class TensorNameMap:
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||
"encoder.layers.{bid}.self_attention.q_layernorm",
|
||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
|
||||
"model.layers.{bid}.attention.query_layernorm", # bailingmoe2
|
||||
@@ -679,7 +685,7 @@ class TensorNameMap:
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_K_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||
"encoder.layers.{bid}.self_attention.k_layernorm",
|
||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
|
||||
"model.layers.{bid}.attention.key_layernorm", # bailingmoe2
|
||||
@@ -695,7 +701,7 @@ class TensorNameMap:
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ROPE_FREQS: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||
"encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||
),
|
||||
|
||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||
@@ -1393,6 +1399,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_EMBD_PATCH: (
|
||||
"vision_tower.vision_model.embeddings.patch_embedding",
|
||||
"model.vision_tower.embeddings.patch_embedding", # minicpmv4_6
|
||||
"model.vision_tower.embeddings.patch_embeddings.projection", # Intern-S1
|
||||
"vpm.embeddings.patch_embedding",
|
||||
"model.vision_model.embeddings.patch_embedding", # SmolVLM
|
||||
@@ -1418,6 +1425,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_EMBD_POS: (
|
||||
"vision_tower.vision_model.embeddings.position_embedding",
|
||||
"model.vision_tower.embeddings.position_embedding", # minicpmv4_6
|
||||
"model.vision_tower.embeddings.position_embeddings", # Intern-S1
|
||||
"vpm.embeddings.position_embedding",
|
||||
"model.vision_model.embeddings.position_embedding", # SmolVLM
|
||||
@@ -1454,6 +1462,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_Q: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
|
||||
"model.vision_tower.encoder.layers.{bid}.self_attn.q_proj", # minicpmv4_6
|
||||
"model.vision_tower.encoder.layer.{bid}.attention.q_proj", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.self_attn.q_proj",
|
||||
"model.vision_model.encoder.layers.{bid}.self_attn.q_proj", # SmolVLM
|
||||
@@ -1477,6 +1486,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_K: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_proj",
|
||||
"model.vision_tower.encoder.layers.{bid}.self_attn.k_proj", # minicpmv4_6
|
||||
"model.vision_tower.encoder.layer.{bid}.attention.k_proj", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.self_attn.k_proj",
|
||||
"model.vision_model.encoder.layers.{bid}.self_attn.k_proj", # SmolVLM
|
||||
@@ -1500,6 +1510,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_V: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_proj",
|
||||
"model.vision_tower.encoder.layers.{bid}.self_attn.v_proj", # minicpmv4_6
|
||||
"model.vision_tower.encoder.layer.{bid}.attention.v_proj", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.self_attn.v_proj",
|
||||
"model.vision_model.encoder.layers.{bid}.self_attn.v_proj", # SmolVLM
|
||||
@@ -1516,6 +1527,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_INPUT_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm1",
|
||||
"model.vision_tower.encoder.layers.{bid}.layer_norm1", # minicpmv4_6
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.norm1", # InternVL
|
||||
"model.vision_tower.encoder.layer.{bid}.layernorm_before", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.layer_norm1",
|
||||
@@ -1536,6 +1548,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_ATTN_O: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.out_proj",
|
||||
"model.vision_tower.encoder.layers.{bid}.self_attn.out_proj", # minicpmv4_6
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.attn.proj", # InternVL
|
||||
"model.vision_tower.encoder.layer.{bid}.attention.projection_layer", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.self_attn.out_proj",
|
||||
@@ -1558,6 +1571,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_POST_ATTN_NORM: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.layer_norm2",
|
||||
"model.vision_tower.encoder.layers.{bid}.layer_norm2", # minicpmv4_6
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.norm2", # InternVL
|
||||
"model.vision_tower.encoder.layer.{bid}.layernorm_after", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.layer_norm2",
|
||||
@@ -1579,6 +1593,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_FFN_UP: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1",
|
||||
"model.vision_tower.encoder.layers.{bid}.mlp.fc1", # minicpmv4_6
|
||||
"model.vision_tower.encoder.layer.{bid}.mlp.fc1", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.mlp.fc1",
|
||||
"model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3
|
||||
@@ -1607,6 +1622,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_ENC_FFN_DOWN: (
|
||||
"vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2",
|
||||
"model.vision_tower.encoder.layers.{bid}.mlp.fc2", # minicpmv4_6
|
||||
"model.vision_tower.encoder.layer.{bid}.mlp.fc2", # Intern-S1
|
||||
"vpm.encoder.layers.{bid}.mlp.fc2",
|
||||
"model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3
|
||||
@@ -1662,6 +1678,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.V_POST_NORM: (
|
||||
"vision_tower.vision_model.post_layernorm",
|
||||
"model.vision_tower.post_layernorm", # minicpmv4_6
|
||||
"model.vision_model.post_layernorm", # SmolVLM
|
||||
"vision_model.layernorm_post", # llama4
|
||||
"visual.merger.ln_q", # qwen2vl
|
||||
@@ -1690,6 +1707,7 @@ class TensorNameMap:
|
||||
"mlp_AR.pre_norm", # PaddleOCR-VL
|
||||
"merger.ln_q",
|
||||
"vision_tower.merger.ln_q", # dots.ocr
|
||||
"model.merger.mlp.0.pre_norm", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MM_SOFT_EMB_NORM: (
|
||||
@@ -1763,6 +1781,38 @@ class TensorNameMap:
|
||||
"model.visual.deepstack_merger_list.{bid}.linear_fc2", # deepstack in qwen3vl
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_LN1: (
|
||||
"model.vision_tower.vit_merger.layer_norm1", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_ATTN_Q: (
|
||||
"model.vision_tower.vit_merger.self_attn.q_proj", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_ATTN_K: (
|
||||
"model.vision_tower.vit_merger.self_attn.k_proj", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_ATTN_V: (
|
||||
"model.vision_tower.vit_merger.self_attn.v_proj", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_ATTN_O: (
|
||||
"model.vision_tower.vit_merger.self_attn.out_proj", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_DS_LN: (
|
||||
"model.vision_tower.vit_merger.pre_norm", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_DS_UP: (
|
||||
"model.vision_tower.vit_merger.linear_1", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MERGER_DS_DOWN: (
|
||||
"model.vision_tower.vit_merger.linear_2", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_SAM_POS_EMBD: (
|
||||
"model.sam_model.pos_embed",
|
||||
),
|
||||
@@ -1822,11 +1872,13 @@ class TensorNameMap:
|
||||
MODEL_TENSOR.V_MM_UP: (
|
||||
"model.vision.linear_proj.dense_h_to_4h", # cogvlm
|
||||
"visual.merger.up_proj", # glm4v
|
||||
"model.merger.mlp.0.linear_1", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MM_DOWN: (
|
||||
"model.vision.linear_proj.dense_4h_to_h", # cogvlm
|
||||
"visual.merger.down_proj", # glm4v
|
||||
"model.merger.mlp.0.linear_2", # minicpmv4_6
|
||||
),
|
||||
|
||||
MODEL_TENSOR.V_MM_GATE: (
|
||||
@@ -1890,6 +1942,7 @@ class TensorNameMap:
|
||||
|
||||
MODEL_TENSOR.A_ENC_INP_PROJ: (
|
||||
"conformer.subsample_conv_projection.input_proj_linear", # gemma4
|
||||
"encoder.input_linear",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_CONV2D: (
|
||||
@@ -1912,6 +1965,7 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.self_attn.linear_q", # lfm2
|
||||
"conformer.layers.{bid}.attention.attn.q_proj", # gemma3n
|
||||
"conformer.layers.{bid}.self_attn.q_proj", # gemma4
|
||||
"encoder.layers.{bid}.attn.to_q", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_ATTN_K: (
|
||||
@@ -1919,6 +1973,7 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.self_attn.linear_k", # lfm2
|
||||
"conformer.layers.{bid}.attention.attn.k_proj", # gemma3n
|
||||
"conformer.layers.{bid}.self_attn.k_proj", # gemma4
|
||||
"encoder.layers.{bid}.attn.to_k", # granite_speech (split from to_kv)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_ATTN_V: (
|
||||
@@ -1926,6 +1981,7 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.self_attn.linear_v", # lfm2
|
||||
"conformer.layers.{bid}.attention.attn.v_proj", # gemma3n
|
||||
"conformer.layers.{bid}.self_attn.v_proj", # gemma4
|
||||
"encoder.layers.{bid}.attn.to_v", # granite_speech (split from to_kv)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_ATTN_K_REL: (
|
||||
@@ -1953,6 +2009,7 @@ class TensorNameMap:
|
||||
"audio_tower.layers.{bid}.self_attn_layer_norm", # ultravox
|
||||
"conformer.layers.{bid}.norm_self_att", # lfm2
|
||||
"conformer.layers.{bid}.attention.pre_attn_norm", # gemma3n
|
||||
"encoder.layers.{bid}.attn.pre_norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_OUTPUT: (
|
||||
@@ -1960,18 +2017,21 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.self_attn.linear_out", # lfm2
|
||||
"conformer.layers.{bid}.attention.post", # gemma3n
|
||||
"conformer.layers.{bid}.self_attn.post", # gemma4
|
||||
"encoder.layers.{bid}.attn.to_out", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_OUTPUT_NORM: (
|
||||
"audio_tower.layers.{bid}.final_layer_norm", # ultravox
|
||||
"conformer.layers.{bid}.norm_out", # lfm2
|
||||
"conformer.layers.{bid}.attention.post_norm", # gemma3n
|
||||
"encoder.layers.{bid}.post_norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_NORM: (
|
||||
"conformer.layers.{bid}.norm_feed_forward1", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_start.pre_layer_norm", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward1.pre_layer_norm", # gemma4
|
||||
"encoder.layers.{bid}.ff1.pre_norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_POST_NORM: (
|
||||
@@ -1988,6 +2048,7 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.feed_forward1.linear1", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_start.ffw_layer_1", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward1.ffw_layer_1", # gemma4
|
||||
"encoder.layers.{bid}.ff1.up_proj", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_GATE: (),
|
||||
@@ -1997,24 +2058,28 @@ class TensorNameMap:
|
||||
"conformer.layers.{bid}.feed_forward1.linear2", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_start.ffw_layer_2", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward1.ffw_layer_2", # gemma4
|
||||
"encoder.layers.{bid}.ff1.down_proj", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_UP_1: (
|
||||
"conformer.layers.{bid}.feed_forward2.linear1", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_end.ffw_layer_1", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward2.ffw_layer_1", # gemma4
|
||||
"encoder.layers.{bid}.ff2.up_proj", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_DOWN_1: (
|
||||
"conformer.layers.{bid}.feed_forward2.linear2", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_end.ffw_layer_2", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward2.ffw_layer_2", # gemma4
|
||||
"encoder.layers.{bid}.ff2.down_proj", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_NORM_1: (
|
||||
"conformer.layers.{bid}.norm_feed_forward2", # lfm2
|
||||
"conformer.layers.{bid}.ffw_layer_end.pre_layer_norm", # gemma3n
|
||||
"conformer.layers.{bid}.feed_forward2.pre_layer_norm", # gemma4
|
||||
"encoder.layers.{bid}.ff2.pre_norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_FFN_POST_NORM_1: (
|
||||
@@ -2071,26 +2136,31 @@ class TensorNameMap:
|
||||
MODEL_TENSOR.A_ENC_CONV_DW: (
|
||||
"conformer.layers.{bid}.conv.depthwise_conv", # lfm2
|
||||
"conformer.layers.{bid}.lconv1d.depthwise_conv1d", # gemma3n
|
||||
"encoder.layers.{bid}.conv.depth_conv.conv", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_CONV_NORM: (
|
||||
"conformer.layers.{bid}.conv.batch_norm", # lfm2
|
||||
"conformer.layers.{bid}.lconv1d.pre_layer_norm", # gemma3n
|
||||
"encoder.layers.{bid}.conv.batch_norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_CONV_PW1: (
|
||||
"conformer.layers.{bid}.conv.pointwise_conv1", # lfm2
|
||||
"conformer.layers.{bid}.lconv1d.linear_start", # gemma3n
|
||||
"encoder.layers.{bid}.conv.up_conv", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_CONV_PW2: (
|
||||
"conformer.layers.{bid}.conv.pointwise_conv2", # lfm2
|
||||
"conformer.layers.{bid}.lconv1d.linear_end", # gemma3n
|
||||
"encoder.layers.{bid}.conv.down_conv", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_NORM_CONV: (
|
||||
"conformer.layers.{bid}.norm_conv", # lfm2
|
||||
"conformer.layers.{bid}.lconv1d.conv_norm", # gemma3n
|
||||
"encoder.layers.{bid}.conv.norm", # granite_speech
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_PER_DIM_K_SCALE: (
|
||||
@@ -2114,6 +2184,62 @@ class TensorNameMap:
|
||||
"model.embed_audio.soft_embedding_norm", # gemma3n
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_ENC_ATTN_REL_POS_EMB: (
|
||||
"encoder.layers.{bid}.attn.rel_pos_emb.weight",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_Q: (
|
||||
"projector.qformer.encoder.layer.{bid}.attention.attention.query",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_K: (
|
||||
"projector.qformer.encoder.layer.{bid}.attention.attention.key",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_V: (
|
||||
"projector.qformer.encoder.layer.{bid}.attention.attention.value",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_O: (
|
||||
"projector.qformer.encoder.layer.{bid}.attention.output.dense",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_SELF_ATTN_NORM: (
|
||||
"projector.qformer.encoder.layer.{bid}.attention.output.LayerNorm",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_Q: (
|
||||
"projector.qformer.encoder.layer.{bid}.crossattention.attention.query",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_K: (
|
||||
"projector.qformer.encoder.layer.{bid}.crossattention.attention.key",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_V: (
|
||||
"projector.qformer.encoder.layer.{bid}.crossattention.attention.value",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_O: (
|
||||
"projector.qformer.encoder.layer.{bid}.crossattention.output.dense",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_CROSS_ATTN_NORM: (
|
||||
"projector.qformer.encoder.layer.{bid}.crossattention.output.LayerNorm",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_FFN_UP: (
|
||||
"projector.qformer.encoder.layer.{bid}.intermediate_query.dense",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_FFN_DOWN: (
|
||||
"projector.qformer.encoder.layer.{bid}.output_query.dense",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.A_QF_FFN_NORM: (
|
||||
"projector.qformer.encoder.layer.{bid}.output_query.LayerNorm",
|
||||
),
|
||||
|
||||
# NextN/MTP tensors
|
||||
MODEL_TENSOR.NEXTN_EH_PROJ: (
|
||||
"model.layers.{bid}.eh_proj",
|
||||
|
||||
@@ -1,44 +1,45 @@
|
||||
[tool.poetry]
|
||||
[project]
|
||||
name = "gguf"
|
||||
version = "0.18.0"
|
||||
version = "0.19.0"
|
||||
description = "Read and write ML models in GGUF for GGML"
|
||||
authors = ["GGML <ggml@ggml.ai>"]
|
||||
packages = [
|
||||
{include = "gguf"},
|
||||
{include = "gguf/py.typed"},
|
||||
]
|
||||
readme = "README.md"
|
||||
homepage = "https://ggml.ai"
|
||||
repository = "https://github.com/ggml-org/llama.cpp"
|
||||
keywords = ["ggml", "gguf", "llama.cpp"]
|
||||
dynamic = ["classifiers"]
|
||||
readme = "README.md"
|
||||
authors = [{name = "GGML", email = "ggml@ggml.ai"}]
|
||||
requires-python = '>=3.10'
|
||||
dependencies = ['numpy (>=1.17)', 'tqdm (>=4.27)', 'pyyaml (>=5.1)', 'requests (>=2.25)']
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8"
|
||||
numpy = ">=1.17"
|
||||
tqdm = ">=4.27"
|
||||
pyyaml = ">=5.1"
|
||||
requests = ">=2.25"
|
||||
sentencepiece = { version = ">=0.1.98,<0.3.0", optional = true }
|
||||
PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true }
|
||||
[project.urls]
|
||||
homepage = "https://ggml.ai"
|
||||
repository = "https://github.com/ggml-org/llama.cpp"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^5.2"
|
||||
|
||||
[tool.poetry.extras]
|
||||
gui = ["PySide6"]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
[project.scripts]
|
||||
gguf-convert-endian = "gguf.scripts.gguf_convert_endian:main"
|
||||
gguf-dump = "gguf.scripts.gguf_dump:main"
|
||||
gguf-set-metadata = "gguf.scripts.gguf_set_metadata:main"
|
||||
gguf-new-metadata = "gguf.scripts.gguf_new_metadata:main"
|
||||
gguf-editor-gui = "gguf.scripts.gguf_editor_gui:main"
|
||||
|
||||
[project.optional-dependencies]
|
||||
gui = ['PySide6 (>=6.9,<7.0) ; python_version >= "3.9" and python_version < "3.14"']
|
||||
|
||||
[tool.poetry]
|
||||
packages = [
|
||||
{include = "gguf"},
|
||||
{include = "gguf/py.typed"},
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^5.2"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
1197
poetry.lock
generated
1197
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,32 +1,49 @@
|
||||
[tool.poetry]
|
||||
[project]
|
||||
name = "llama-cpp-scripts"
|
||||
version = "0.0.0"
|
||||
description = "Scripts that ship with llama.cpp"
|
||||
authors = ["GGML <ggml@ggml.ai>"]
|
||||
readme = "README.md"
|
||||
homepage = "https://ggml.ai"
|
||||
repository = "https://github.com/ggml-org/llama.cpp"
|
||||
keywords = ["ggml", "gguf", "llama.cpp"]
|
||||
packages = [{ include = "*.py", from = "." }]
|
||||
version = "0.0.0"
|
||||
dynamic = ["classifiers"]
|
||||
readme = "README.md"
|
||||
authors = [{name = "GGML", email = "ggml@ggml.ai"}]
|
||||
requires-python = '>=3.10'
|
||||
dependencies = [
|
||||
'numpy (>=1.25.0,<2.0.0)',
|
||||
'sentencepiece (>=0.1.98,<0.3.0)',
|
||||
'transformers (==5.5.1)',
|
||||
'protobuf (>=4.21.0)',
|
||||
'torch (>=2.2.0,<3.0.0)',
|
||||
'gguf @ ./gguf-py',
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
homepage = "https://ggml.ai"
|
||||
repository = "https://github.com/ggml-org/llama.cpp"
|
||||
|
||||
[project.scripts]
|
||||
llama-convert-hf-to-gguf = "convert_hf_to_gguf:main"
|
||||
llama-convert-lora-to-gguf = "convert_lora_to_gguf:main"
|
||||
llama-convert-llama-ggml-to-gguf = "convert_llama_ggml_to_gguf:main"
|
||||
llama-ggml-vk-generate-shaders = "ggml_vk_generate_shaders:main"
|
||||
|
||||
[tool.poetry]
|
||||
packages = [{ include = "*.py", from = "." }]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9"
|
||||
numpy = "^1.25.0"
|
||||
sentencepiece = ">=0.1.98,<0.3.0"
|
||||
transformers = "==5.5.1"
|
||||
protobuf = ">=4.21.0,<5.0.0"
|
||||
gguf = { path = "./gguf-py" }
|
||||
torch = { version = "^2.2.0", source = "pytorch" }
|
||||
torch = [
|
||||
{ version = "~=2.6.0", source = "pypi", markers = "sys_platform == 'darwin'" },
|
||||
{ version = "~=2.6.0+cpu", source = "pytorch", markers = "sys_platform == 'linux'" },
|
||||
{ version = "~=2.6.0", source = "pypi", markers = "sys_platform == 'win32'" }
|
||||
]
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^5.2"
|
||||
|
||||
|
||||
# Force wheel + cpu
|
||||
# For discussion and context see https://github.com/python-poetry/poetry#6409
|
||||
[[tool.poetry.source]]
|
||||
@@ -34,12 +51,14 @@ name = "pytorch"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
priority = "explicit"
|
||||
|
||||
[tool.uv.sources]
|
||||
torch = { index = "pytorch" }
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
explicit = true
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
llama-convert-hf-to-gguf = "convert_hf_to_gguf:main"
|
||||
llama-convert-lora-to-gguf = "convert_lora_to_gguf:main"
|
||||
llama-convert-llama-ggml-to-gguf = "convert_llama_ggml_to_gguf:main"
|
||||
llama-ggml-vk-generate-shaders = "ggml_vk_generate_shaders:main"
|
||||
|
||||
@@ -1 +1 @@
|
||||
19eac6f0edaf285506eb6228d31bb9caeda9aba1
|
||||
ac6f7b44f60fde0091f0b3d99afde48f8c99b13a
|
||||
|
||||
@@ -232,6 +232,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
{ LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, "%s.attention.sliding_window_pattern" },
|
||||
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
||||
{ LLM_KV_ATTENTION_OUTPUT_SCALE, "%s.attention.output_scale" },
|
||||
{ LLM_KV_ATTENTION_VALUE_SCALE, "%s.attention.value_scale" },
|
||||
{ LLM_KV_ATTENTION_TEMPERATURE_LENGTH, "%s.attention.temperature_length" },
|
||||
{ LLM_KV_ATTENTION_TEMPERATURE_SCALE, "%s.attention.temperature_scale" },
|
||||
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
||||
|
||||
@@ -236,6 +236,7 @@ enum llm_kv {
|
||||
LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,
|
||||
LLM_KV_ATTENTION_SCALE,
|
||||
LLM_KV_ATTENTION_OUTPUT_SCALE,
|
||||
LLM_KV_ATTENTION_VALUE_SCALE,
|
||||
LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
|
||||
LLM_KV_ATTENTION_TEMPERATURE_SCALE,
|
||||
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
||||
|
||||
@@ -2656,13 +2656,8 @@ size_t llama_context::state_seq_set_data(llama_seq_id seq_id, const uint8_t * sr
|
||||
throw std::runtime_error("wrong sequence state magic");
|
||||
}
|
||||
|
||||
const bool need_seq_match = (flags & LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
llama_seq_id seq_id_read;
|
||||
io->read(&seq_id_read, sizeof(seq_id_read));
|
||||
if (need_seq_match && seq_id != seq_id_read) {
|
||||
throw std::runtime_error("wrong sequence id");
|
||||
}
|
||||
|
||||
return state_seq_read_data(*io, seq_id, flags);
|
||||
} catch (const std::exception & err) {
|
||||
|
||||
@@ -166,6 +166,8 @@ struct llama_hparams {
|
||||
float f_attn_out_scale = 0.0f;
|
||||
uint32_t attn_temp_length = 0;
|
||||
|
||||
float f_attn_value_scale = 0.0f;
|
||||
|
||||
bool causal_attn = true;
|
||||
bool use_alibi = false;
|
||||
bool attn_soft_cap = false;
|
||||
|
||||
@@ -268,6 +268,7 @@ void llama_model_saver::add_kv_from_model() {
|
||||
// add_kv(LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, ???);
|
||||
add_kv(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
|
||||
add_kv(LLM_KV_ATTENTION_OUTPUT_SCALE, hparams.f_attn_out_scale);
|
||||
add_kv(LLM_KV_ATTENTION_VALUE_SCALE, hparams.f_attn_value_scale);
|
||||
add_kv(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.attn_temp_length);
|
||||
add_kv(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale);
|
||||
add_kv(LLM_KV_ATTENTION_KEY_LENGTH_MLA, hparams.n_embd_head_k_mla_impl);
|
||||
|
||||
@@ -285,7 +285,7 @@ static llama_model * llama_model_mapping(llm_arch arch, const llama_model_params
|
||||
case LLM_ARCH_STEP35:
|
||||
return new llama_model_step35(params);
|
||||
default:
|
||||
GGML_ABORT("unimplemented model class");
|
||||
throw std::runtime_error(std::string("unsupported model architecture: '") + llm_arch_name(arch) + "'");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1671,6 +1671,7 @@ void llama_model::print_info() const {
|
||||
LLAMA_LOG_INFO("%s: f_max_alibi_bias = %.1e\n", __func__, hparams.f_max_alibi_bias);
|
||||
LLAMA_LOG_INFO("%s: f_logit_scale = %.1e\n", __func__, hparams.f_logit_scale);
|
||||
LLAMA_LOG_INFO("%s: f_attn_scale = %.1e\n", __func__, hparams.f_attention_scale);
|
||||
LLAMA_LOG_INFO("%s: f_attn_value_scale = %.4f\n", __func__, hparams.f_attn_value_scale);
|
||||
LLAMA_LOG_INFO("%s: n_ff = %s\n", __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
|
||||
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
|
||||
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
|
||||
|
||||
@@ -71,12 +71,18 @@ bool llama_supports_mlock(void) {
|
||||
}
|
||||
|
||||
bool llama_supports_gpu_offload(void) {
|
||||
if (!ggml_backend_reg_count()) {
|
||||
ggml_backend_load_all();
|
||||
}
|
||||
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
||||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
|
||||
llama_supports_rpc();
|
||||
}
|
||||
|
||||
bool llama_supports_rpc(void) {
|
||||
if (!ggml_backend_reg_count()) {
|
||||
ggml_backend_load_all();
|
||||
}
|
||||
return ggml_backend_reg_by_name("RPC") != nullptr;
|
||||
}
|
||||
|
||||
@@ -89,6 +95,10 @@ void llama_backend_init(void) {
|
||||
struct ggml_context * ctx = ggml_init(params);
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
||||
if (!ggml_backend_reg_count()) {
|
||||
ggml_backend_load_all();
|
||||
}
|
||||
}
|
||||
|
||||
void llama_numa_init(enum ggml_numa_strategy numa) {
|
||||
|
||||
@@ -10,7 +10,16 @@ void llama_model_mimo2::load_arch_hparams(llama_model_loader & ml) {
|
||||
ml.get_key(LLM_KV_ROPE_FREQ_BASE_SWA, hparams.rope_freq_base_train_swa, false);
|
||||
ml.get_key_or_arr(LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, hparams.swa_layers, hparams.n_layer);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
float value_scale = 0.0f;
|
||||
if (ml.get_key(LLM_KV_ATTENTION_VALUE_SCALE, value_scale, false) && value_scale != 1.0f) {
|
||||
hparams.f_attn_value_scale = value_scale;
|
||||
}
|
||||
|
||||
ml.get_key(LLM_KV_NEXTN_PREDICT_LAYERS, hparams.nextn_predict_layers, false);
|
||||
GGML_ASSERT(hparams.nextn_predict_layers < hparams.n_layer && "nextn_predict_layers must be < n_layer");
|
||||
hparams.n_layer_kv_from_start = hparams.n_layer - hparams.nextn_predict_layers;
|
||||
|
||||
switch (hparams.n_layer - hparams.nextn_predict_layers) {
|
||||
case 48: type = LLM_TYPE_310B_A15B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
@@ -25,32 +34,45 @@ void llama_model_mimo2::load_arch_tensors(llama_model_loader &) {
|
||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
|
||||
|
||||
const uint32_t n_nextn = hparams.nextn_predict_layers;
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = layers[i];
|
||||
uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(i);
|
||||
uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(i);
|
||||
uint32_t n_head = hparams.n_head(i);
|
||||
|
||||
create_tensor_qkv(layer, i, n_embd, n_embd_head_k * n_head, n_embd_k_gqa, n_embd_v_gqa, 0);
|
||||
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_v * n_head, n_embd }, 0);
|
||||
// NextN/MTP layers (the last n_nextn blocks) are preserved but disabled pending support
|
||||
const bool is_nextn = (n_nextn > 0) && (static_cast<uint32_t>(i) >= n_layer - n_nextn);
|
||||
const int skip = is_nextn ? TENSOR_SKIP : 0;
|
||||
|
||||
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
|
||||
layer.attn_sinks = create_tensor(tn(LLM_TENSOR_ATTN_SINKS, "weight", i), {n_head}, TENSOR_NOT_REQUIRED);
|
||||
create_tensor_qkv(layer, i, n_embd, n_embd_head_k * n_head, n_embd_k_gqa, n_embd_v_gqa, skip);
|
||||
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_v * n_head, n_embd }, skip);
|
||||
|
||||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
|
||||
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, skip);
|
||||
layer.attn_sinks = create_tensor(tn(LLM_TENSOR_ATTN_SINKS, "weight", i), {n_head}, TENSOR_NOT_REQUIRED | skip);
|
||||
|
||||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, skip);
|
||||
|
||||
// non-MoE branch
|
||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED | skip);
|
||||
|
||||
// MoE branch
|
||||
int64_t n_ff_exp = hparams.n_ff_exp;
|
||||
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp, n_embd, n_expert}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED);
|
||||
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp, n_embd, n_expert}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED | skip);
|
||||
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED | skip);
|
||||
|
||||
if (is_nextn) {
|
||||
layer.nextn.eh_proj = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", i), {2 * n_embd, n_embd}, skip);
|
||||
layer.nextn.enorm = create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", i), {n_embd}, skip);
|
||||
layer.nextn.hnorm = create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", i), {n_embd}, skip);
|
||||
layer.layer_out_norm = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}, skip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +90,12 @@ llama_model_mimo2::graph::graph(const llama_model & model, const llm_graph_param
|
||||
auto * inp_attn = build_attn_inp_kv_iswa();
|
||||
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
const float v_scale = hparams.f_attn_value_scale;
|
||||
|
||||
// The last hparams.nextn_predict_layers blocks are MTP heads, currently inactive
|
||||
const int n_transformer_layers = n_layer - hparams.nextn_predict_layers;
|
||||
|
||||
for (int il = 0; il < n_transformer_layers; ++il) {
|
||||
ggml_tensor * inpSA = inpL;
|
||||
|
||||
uint32_t n_head_l = hparams.n_head(il);
|
||||
@@ -83,19 +110,39 @@ llama_model_mimo2::graph::graph(const llama_model & model, const llm_graph_param
|
||||
cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
|
||||
cb(cur, "attn_norm", il);
|
||||
|
||||
// compute Q and K and RoPE them
|
||||
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
|
||||
cb(Qcur, "Qcur", il);
|
||||
ggml_tensor * Qcur;
|
||||
ggml_tensor * Kcur;
|
||||
ggml_tensor * Vcur;
|
||||
|
||||
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
|
||||
cb(Kcur, "Kcur", il);
|
||||
if (model.layers[il].wqkv) {
|
||||
// Fused qkv_proj - Q/K share head_dim_k, V uses head_dim_v
|
||||
ggml_tensor * qkv = build_lora_mm(model.layers[il].wqkv, cur);
|
||||
cb(qkv, "wqkv", il);
|
||||
|
||||
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
|
||||
cb(Vcur, "Vcur", il);
|
||||
const size_t row_k = ggml_row_size(qkv->type, n_embd_head_k);
|
||||
const size_t row_v = ggml_row_size(qkv->type, n_embd_head_v);
|
||||
const size_t row_full = qkv->nb[1];
|
||||
const size_t k_off = row_k * n_head_l;
|
||||
const size_t v_off = k_off + row_k * n_head_kv_l;
|
||||
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head_l, n_tokens);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv_l, n_tokens);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head_v, n_head_kv_l, n_tokens);
|
||||
Qcur = ggml_view_3d(ctx0, qkv, n_embd_head_k, n_head_l, n_tokens, row_k, row_full, 0);
|
||||
Kcur = ggml_view_3d(ctx0, qkv, n_embd_head_k, n_head_kv_l, n_tokens, row_k, row_full, k_off);
|
||||
Vcur = ggml_view_3d(ctx0, qkv, n_embd_head_v, n_head_kv_l, n_tokens, row_v, row_full, v_off);
|
||||
} else {
|
||||
// Split path
|
||||
Qcur = build_lora_mm(model.layers[il].wq, cur);
|
||||
cb(Qcur, "Qcur", il);
|
||||
|
||||
Kcur = build_lora_mm(model.layers[il].wk, cur);
|
||||
cb(Kcur, "Kcur", il);
|
||||
|
||||
Vcur = build_lora_mm(model.layers[il].wv, cur);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head_l, n_tokens);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv_l, n_tokens);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head_v, n_head_kv_l, n_tokens);
|
||||
}
|
||||
|
||||
Qcur = ggml_rope_ext(
|
||||
ctx0, Qcur, inp_pos, nullptr,
|
||||
@@ -118,9 +165,15 @@ llama_model_mimo2::graph::graph(const llama_model & model, const llm_graph_param
|
||||
cur = build_attn(inp_attn,
|
||||
model.layers[il].wo, NULL, model.layers[il].wo_s,
|
||||
Qcur, Kcur, Vcur, nullptr, sinks, nullptr, 1.0f/sqrtf(float(n_embd_head_k)), il);
|
||||
cb(cur, "attn_out", il);
|
||||
|
||||
if (v_scale) {
|
||||
cur = ggml_scale(ctx0, cur, v_scale);
|
||||
cb(cur, "attn_out_scaled", il);
|
||||
}
|
||||
}
|
||||
|
||||
if (il == n_layer - 1 && inp_out_ids) {
|
||||
if (il == n_transformer_layers - 1 && inp_out_ids) {
|
||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
||||
}
|
||||
|
||||
@@ -29,6 +29,9 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// init
|
||||
|
||||
ggml_backend_load_all();
|
||||
|
||||
common_init_result_ptr llama_init = common_init_from_params(params);
|
||||
|
||||
llama_model * model = llama_init->model();
|
||||
|
||||
@@ -21,6 +21,7 @@ add_library(mtmd
|
||||
models/gemma4a.cpp
|
||||
models/gemma4v.cpp
|
||||
models/glm4v.cpp
|
||||
models/granite-speech.cpp
|
||||
models/hunyuanocr.cpp
|
||||
models/internvl.cpp
|
||||
models/kimivl.cpp
|
||||
|
||||
@@ -49,6 +49,7 @@ For the following models, you can use `convert_hf_to_gguf.py` with `--mmproj` fl
|
||||
- Qwen 2 VL and Qwen 2.5 VL (from [Qwen](https://huggingface.co/Qwen))
|
||||
- [Mistral Small 3.1 24B](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503)
|
||||
- InternVL 2.5 and InternVL 3 from [OpenGVLab](https://huggingface.co/OpenGVLab) (note: we don't support conversion of `InternVL3-*-hf` model, only non-HF version is supported ; `InternLM2Model` **text** model is not supported)
|
||||
- [MiniCPM-V 4.6](https://huggingface.co/openbmb/MiniCPM-V-4_6) ; See the guide [here](../../docs/multimodal/minicpmv4.6.md) - requires the standard `transformers` v5.7.0+ checkpoint
|
||||
|
||||
For older models, please refer to the relevant guide for instructions on how to obtain or create them:
|
||||
|
||||
@@ -60,4 +61,7 @@ NOTE: conversion scripts are located under `tools/mtmd/legacy-models`
|
||||
- [MiniCPM-V 2.5](../../docs/multimodal/minicpmv2.5.md)
|
||||
- [MiniCPM-V 2.6](../../docs/multimodal/minicpmv2.6.md)
|
||||
- [MiniCPM-o 2.6](../../docs/multimodal/minicpmo2.6.md)
|
||||
- [MiniCPM-V 4.0](../../docs/multimodal/minicpmv4.0.md)
|
||||
- [MiniCPM-o 4.0](../../docs/multimodal/minicpmo4.0.md)
|
||||
- [MiniCPM-V 4.5](../../docs/multimodal/minicpmv4.5.md)
|
||||
- [IBM Granite Vision](../../docs/multimodal/granitevision.md)
|
||||
|
||||
@@ -60,9 +60,15 @@
|
||||
#define KEY_SAM_N_BLOCK "clip.vision.sam.block_count"
|
||||
#define KEY_SAM_N_EMBD "clip.vision.sam.embedding_length"
|
||||
// audio-specific
|
||||
#define KEY_AUDIO_PROJ_TYPE "clip.audio.projector_type" // for models with mixed modalities
|
||||
#define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins"
|
||||
#define KEY_A_PROJ_STACK_FACTOR "clip.audio.projector.stack_factor"
|
||||
#define KEY_AUDIO_PROJ_TYPE "clip.audio.projector_type" // for models with mixed modalities
|
||||
#define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins"
|
||||
#define KEY_A_PROJ_STACK_FACTOR "clip.audio.projector.stack_factor"
|
||||
#define KEY_A_CHUNK_SIZE "clip.audio.chunk_size"
|
||||
#define KEY_A_CONV_KERNEL_SIZE "clip.audio.conv_kernel_size"
|
||||
#define KEY_A_MAX_POS_EMB "clip.audio.max_pos_emb"
|
||||
#define KEY_A_PROJ_WINDOW_SIZE "clip.audio.projector.window_size"
|
||||
#define KEY_A_PROJ_DOWNSAMPLE_RATE "clip.audio.projector.downsample_rate"
|
||||
#define KEY_A_PROJ_HEAD_COUNT "clip.audio.projector.head_count"
|
||||
|
||||
|
||||
//
|
||||
@@ -126,6 +132,17 @@
|
||||
#define TN_MINICPMV_ATTN "resampler.attn.%s.%s"
|
||||
#define TN_MINICPMV_LN "resampler.ln_%s.%s"
|
||||
|
||||
// MiniCPM-V 4.6 ViT merger (window attention + MLP downsample),
|
||||
// matching the upstream `vit_merger` module name in transformers.
|
||||
#define TN_VIT_MERGER_LN1 "v.vit_merger.ln1.%s"
|
||||
#define TN_VIT_MERGER_ATTN_Q "v.vit_merger.attn_q.%s"
|
||||
#define TN_VIT_MERGER_ATTN_K "v.vit_merger.attn_k.%s"
|
||||
#define TN_VIT_MERGER_ATTN_V "v.vit_merger.attn_v.%s"
|
||||
#define TN_VIT_MERGER_ATTN_O "v.vit_merger.attn_out.%s"
|
||||
#define TN_VIT_MERGER_DS_LN "v.vit_merger.ds_ln.%s"
|
||||
#define TN_VIT_MERGER_DS_UP "v.vit_merger.ds_ffn_up.%s"
|
||||
#define TN_VIT_MERGER_DS_DOWN "v.vit_merger.ds_ffn_down.%s"
|
||||
|
||||
#define TN_GLM_ADAPER_CONV "adapter.conv.%s"
|
||||
#define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s"
|
||||
#define TN_GLM_ADAPTER_NORM_1 "adapter.linear.norm1.%s"
|
||||
@@ -182,6 +199,27 @@
|
||||
#define TN_CONV_NORM "%s.blk.%d.conv_norm.%s"
|
||||
#define TN_CONV_PW1 "%s.blk.%d.conv_pw1.%s"
|
||||
#define TN_CONV_PW2 "%s.blk.%d.conv_pw2.%s"
|
||||
#define TN_INP_PROJ "a.input_projection.%s"
|
||||
#define TN_CTC_OUT "a.enc_ctc_out.%s"
|
||||
#define TN_CTC_OUT_MID "a.enc_ctc_out_mid.%s"
|
||||
#define TN_ATTN_REL_POS_EMB "%s.blk.%d.attn_rel_pos_emb"
|
||||
// qformer projector
|
||||
#define TN_QF_PROJ_QUERY "a.proj_query"
|
||||
#define TN_QF_PROJ_NORM "a.proj_norm.%s"
|
||||
#define TN_QF_PROJ_LINEAR "a.proj_linear.%s"
|
||||
#define TN_QF_SELF_ATTN_Q "a.proj_blk.%d.self_attn_q.%s"
|
||||
#define TN_QF_SELF_ATTN_K "a.proj_blk.%d.self_attn_k.%s"
|
||||
#define TN_QF_SELF_ATTN_V "a.proj_blk.%d.self_attn_v.%s"
|
||||
#define TN_QF_SELF_ATTN_O "a.proj_blk.%d.self_attn_out.%s"
|
||||
#define TN_QF_SELF_ATTN_N "a.proj_blk.%d.self_attn_norm.%s"
|
||||
#define TN_QF_CROSS_ATTN_Q "a.proj_blk.%d.cross_attn_q.%s"
|
||||
#define TN_QF_CROSS_ATTN_K "a.proj_blk.%d.cross_attn_k.%s"
|
||||
#define TN_QF_CROSS_ATTN_V "a.proj_blk.%d.cross_attn_v.%s"
|
||||
#define TN_QF_CROSS_ATTN_O "a.proj_blk.%d.cross_attn_out.%s"
|
||||
#define TN_QF_CROSS_ATTN_N "a.proj_blk.%d.cross_attn_norm.%s"
|
||||
#define TN_QF_FFN_UP "a.proj_blk.%d.ffn_up.%s"
|
||||
#define TN_QF_FFN_DOWN "a.proj_blk.%d.ffn_down.%s"
|
||||
#define TN_QF_FFN_NORM "a.proj_blk.%d.ffn_norm.%s"
|
||||
|
||||
// gemma4 audio conformer
|
||||
#define TN_A_MM_INP_PROJ "mm.a.input_projection.%s"
|
||||
@@ -304,6 +342,8 @@ enum projector_type {
|
||||
PROJECTOR_TYPE_NEMOTRON_V2_VL,
|
||||
PROJECTOR_TYPE_HUNYUANOCR,
|
||||
PROJECTOR_TYPE_HUNYUANVL,
|
||||
PROJECTOR_TYPE_MINICPMV4_6,
|
||||
PROJECTOR_TYPE_GRANITE_SPEECH,
|
||||
PROJECTOR_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
@@ -351,6 +391,8 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||
{ PROJECTOR_TYPE_NEMOTRON_V2_VL, "nemotron_v2_vl"},
|
||||
{ PROJECTOR_TYPE_HUNYUANOCR, "hunyuanocr"},
|
||||
{ PROJECTOR_TYPE_HUNYUANVL, "hunyuanvl"},
|
||||
{ PROJECTOR_TYPE_MINICPMV4_6, "minicpmv4_6"},
|
||||
{ PROJECTOR_TYPE_GRANITE_SPEECH, "granite_speech"},
|
||||
};
|
||||
|
||||
static projector_type clip_projector_type_from_string(const std::string & str) {
|
||||
|
||||
@@ -92,6 +92,12 @@ struct clip_hparams {
|
||||
// audio
|
||||
int32_t n_mel_bins = 0; // whisper preprocessor
|
||||
int32_t proj_stack_factor = 0; // ultravox
|
||||
int32_t audio_chunk_size = 0;
|
||||
int32_t audio_conv_kernel_size = 0;
|
||||
int32_t audio_max_pos_emb = 0;
|
||||
int32_t audio_proj_window_size = 0;
|
||||
int32_t audio_proj_downsample_rate = 0;
|
||||
int32_t audio_proj_head_count = 0;
|
||||
|
||||
// audio-to-mel preprocessor params
|
||||
int32_t audio_chunk_len = -1; // in seconds
|
||||
@@ -104,6 +110,7 @@ struct clip_hparams {
|
||||
bool has_llava_projector = false;
|
||||
int minicpmv_version = 0;
|
||||
int32_t minicpmv_query_num = 0; // MiniCPM-V query number
|
||||
int32_t insert_layer_id = 0; // MiniCPM-V 4.6 ViT merger insertion layer
|
||||
|
||||
// custom value provided by user, can be undefined if not set
|
||||
int32_t custom_image_min_tokens = -1;
|
||||
@@ -224,6 +231,21 @@ struct clip_layer {
|
||||
ggml_tensor * per_dim_k_scale_w = nullptr;
|
||||
ggml_tensor * ff_post_norm_1_w = nullptr;
|
||||
|
||||
// granite_speech conformer per-layer
|
||||
ggml_tensor * attn_rel_pos_emb = nullptr;
|
||||
|
||||
// granite_speech qformer cross-attention
|
||||
ggml_tensor * cross_attn_q_w = nullptr;
|
||||
ggml_tensor * cross_attn_q_b = nullptr;
|
||||
ggml_tensor * cross_attn_k_w = nullptr;
|
||||
ggml_tensor * cross_attn_k_b = nullptr;
|
||||
ggml_tensor * cross_attn_v_w = nullptr;
|
||||
ggml_tensor * cross_attn_v_b = nullptr;
|
||||
ggml_tensor * cross_attn_o_w = nullptr;
|
||||
ggml_tensor * cross_attn_o_b = nullptr;
|
||||
ggml_tensor * cross_attn_norm_w = nullptr;
|
||||
ggml_tensor * cross_attn_norm_b = nullptr;
|
||||
|
||||
bool has_deepstack() const {
|
||||
return deepstack_fc1_w != nullptr;
|
||||
}
|
||||
@@ -403,6 +425,24 @@ struct clip_model {
|
||||
ggml_tensor * mm_model_ln_post_w = nullptr;
|
||||
ggml_tensor * mm_model_ln_post_b = nullptr;
|
||||
|
||||
// MiniCPM-V 4.6 ViT merger (window self-attention + ViT MLP downsample)
|
||||
ggml_tensor * vit_merger_ln1_w = nullptr;
|
||||
ggml_tensor * vit_merger_ln1_b = nullptr;
|
||||
ggml_tensor * vit_merger_attn_q_w = nullptr;
|
||||
ggml_tensor * vit_merger_attn_q_b = nullptr;
|
||||
ggml_tensor * vit_merger_attn_k_w = nullptr;
|
||||
ggml_tensor * vit_merger_attn_k_b = nullptr;
|
||||
ggml_tensor * vit_merger_attn_v_w = nullptr;
|
||||
ggml_tensor * vit_merger_attn_v_b = nullptr;
|
||||
ggml_tensor * vit_merger_attn_o_w = nullptr;
|
||||
ggml_tensor * vit_merger_attn_o_b = nullptr;
|
||||
ggml_tensor * vit_merger_ds_ln_w = nullptr;
|
||||
ggml_tensor * vit_merger_ds_ln_b = nullptr;
|
||||
ggml_tensor * vit_merger_ds_up_w = nullptr;
|
||||
ggml_tensor * vit_merger_ds_up_b = nullptr;
|
||||
ggml_tensor * vit_merger_ds_down_w = nullptr;
|
||||
ggml_tensor * vit_merger_ds_down_b = nullptr;
|
||||
|
||||
// gemma3
|
||||
ggml_tensor * mm_input_proj_w = nullptr;
|
||||
ggml_tensor * mm_soft_emb_norm_w = nullptr;
|
||||
@@ -515,6 +555,21 @@ struct clip_model {
|
||||
ggml_tensor * audio_out_proj_w = nullptr;
|
||||
ggml_tensor * audio_out_proj_b = nullptr;
|
||||
|
||||
// granite_speech encoder
|
||||
ggml_tensor * inp_proj_w = nullptr;
|
||||
ggml_tensor * inp_proj_b = nullptr;
|
||||
ggml_tensor * ctc_out_w = nullptr;
|
||||
ggml_tensor * ctc_out_b = nullptr;
|
||||
ggml_tensor * ctc_out_mid_w = nullptr;
|
||||
ggml_tensor * ctc_out_mid_b = nullptr;
|
||||
// qformer projector
|
||||
ggml_tensor * qf_proj_query = nullptr;
|
||||
ggml_tensor * qf_proj_norm_w = nullptr;
|
||||
ggml_tensor * qf_proj_norm_b = nullptr;
|
||||
ggml_tensor * qf_proj_linear_w = nullptr;
|
||||
ggml_tensor * qf_proj_linear_b = nullptr;
|
||||
std::vector<clip_layer> qf_proj_layers;
|
||||
|
||||
bool audio_has_avgpool() const {
|
||||
return proj_type == PROJECTOR_TYPE_QWEN2A
|
||||
|| proj_type == PROJECTOR_TYPE_VOXTRAL
|
||||
|
||||
@@ -874,6 +874,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
{
|
||||
builder = std::make_unique<clip_graph_minicpmv>(ctx, img);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
builder = std::make_unique<clip_graph_minicpmv4_6>(ctx, img);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_INTERNVL:
|
||||
{
|
||||
builder = std::make_unique<clip_graph_internvl>(ctx, img);
|
||||
@@ -936,6 +940,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
{
|
||||
builder = std::make_unique<clip_graph_gemma4a>(ctx, img);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
builder = std::make_unique<clip_graph_granite_speech>(ctx, img);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GLM4V:
|
||||
{
|
||||
builder = std::make_unique<clip_graph_glm4v>(ctx, img);
|
||||
@@ -1227,6 +1235,20 @@ struct clip_model_loader {
|
||||
hparams.minicpmv_version = 2; // default to 2 if not set
|
||||
}
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
// MiniCPM-V 4.6 unified merger projector
|
||||
// ViT merger 2x2 + final merger 2x2 = 4x spatial merge per dimension
|
||||
hparams.n_merge = 4;
|
||||
get_u32(KEY_PROJ_SCALE_FACTOR, hparams.n_merge, false);
|
||||
|
||||
// borrow wa_layer_indexes for vit_merger insertion point
|
||||
std::vector<int> wa_layer_indexes_vec;
|
||||
get_arr_int(KEY_WIN_ATTN_LAYER_INDEXES, wa_layer_indexes_vec, false);
|
||||
if (!wa_layer_indexes_vec.empty()) {
|
||||
hparams.insert_layer_id = wa_layer_indexes_vec[0];
|
||||
}
|
||||
} break;
|
||||
case PROJECTOR_TYPE_INTERNVL:
|
||||
{
|
||||
// use default llava-uhd preprocessing params
|
||||
@@ -1503,6 +1525,20 @@ struct clip_model_loader {
|
||||
hparams.audio_window_len = 320; // 20ms frame (NOT 25ms/400)
|
||||
hparams.audio_hop_len = 160;
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
hparams.audio_chunk_len = 0;
|
||||
hparams.audio_sample_rate = 16000;
|
||||
hparams.audio_n_fft = 512;
|
||||
hparams.audio_window_len = 400;
|
||||
hparams.audio_hop_len = 160;
|
||||
get_u32(KEY_A_CHUNK_SIZE, hparams.audio_chunk_size);
|
||||
get_u32(KEY_A_CONV_KERNEL_SIZE, hparams.audio_conv_kernel_size);
|
||||
get_u32(KEY_A_MAX_POS_EMB, hparams.audio_max_pos_emb);
|
||||
get_u32(KEY_A_PROJ_WINDOW_SIZE, hparams.audio_proj_window_size);
|
||||
get_u32(KEY_A_PROJ_DOWNSAMPLE_RATE, hparams.audio_proj_downsample_rate);
|
||||
get_u32(KEY_A_PROJ_HEAD_COUNT, hparams.audio_proj_head_count);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_JANUS_PRO:
|
||||
{
|
||||
hparams.image_pad_color = {127, 127, 127};
|
||||
@@ -1654,13 +1690,13 @@ struct clip_model_loader {
|
||||
|
||||
model.position_embeddings = get_tensor(string_format(TN_POS_EMBD, prefix), false);
|
||||
|
||||
if (model.proj_type == PROJECTOR_TYPE_GEMMA3NV) {
|
||||
hparams.n_layer = 0; // gemma3n does not use normal layer structure
|
||||
}
|
||||
const bool has_standard_layers = (
|
||||
model.proj_type != PROJECTOR_TYPE_GEMMA3NV);
|
||||
|
||||
// layers
|
||||
model.layers.resize(hparams.n_layer);
|
||||
for (int il = 0; il < hparams.n_layer; ++il) {
|
||||
const int n_layers_to_load = has_standard_layers ? hparams.n_layer : 0;
|
||||
model.layers.resize(n_layers_to_load);
|
||||
for (int il = 0; il < n_layers_to_load; ++il) {
|
||||
auto & layer = model.layers[il];
|
||||
layer.k_w = get_tensor(string_format(TN_ATTN_K, prefix, il, "weight"), false);
|
||||
layer.q_w = get_tensor(string_format(TN_ATTN_Q, prefix, il, "weight"), false);
|
||||
@@ -1719,6 +1755,7 @@ struct clip_model_loader {
|
||||
|| model.proj_type == PROJECTOR_TYPE_GEMMA3
|
||||
|| model.proj_type == PROJECTOR_TYPE_IDEFICS3
|
||||
|| model.proj_type == PROJECTOR_TYPE_MINICPMV
|
||||
|| model.proj_type == PROJECTOR_TYPE_MINICPMV4_6
|
||||
) && layer.ff_up_w && layer.ff_down_w && layer.ff_down_w->ne[0] == hparams.n_embd;
|
||||
if (is_ffn_swapped) {
|
||||
// swap up and down weights
|
||||
@@ -1820,6 +1857,34 @@ struct clip_model_loader {
|
||||
model.mm_model_ln_post_w = get_tensor(string_format(TN_MINICPMV_LN, "post", "weight"));
|
||||
model.mm_model_ln_post_b = get_tensor(string_format(TN_MINICPMV_LN, "post", "bias"));
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
// ViT merger: window self-attention
|
||||
model.vit_merger_ln1_w = get_tensor(string_format(TN_VIT_MERGER_LN1, "weight"));
|
||||
model.vit_merger_ln1_b = get_tensor(string_format(TN_VIT_MERGER_LN1, "bias"));
|
||||
model.vit_merger_attn_q_w = get_tensor(string_format(TN_VIT_MERGER_ATTN_Q, "weight"));
|
||||
model.vit_merger_attn_q_b = get_tensor(string_format(TN_VIT_MERGER_ATTN_Q, "bias"), false);
|
||||
model.vit_merger_attn_k_w = get_tensor(string_format(TN_VIT_MERGER_ATTN_K, "weight"));
|
||||
model.vit_merger_attn_k_b = get_tensor(string_format(TN_VIT_MERGER_ATTN_K, "bias"), false);
|
||||
model.vit_merger_attn_v_w = get_tensor(string_format(TN_VIT_MERGER_ATTN_V, "weight"));
|
||||
model.vit_merger_attn_v_b = get_tensor(string_format(TN_VIT_MERGER_ATTN_V, "bias"), false);
|
||||
model.vit_merger_attn_o_w = get_tensor(string_format(TN_VIT_MERGER_ATTN_O, "weight"));
|
||||
model.vit_merger_attn_o_b = get_tensor(string_format(TN_VIT_MERGER_ATTN_O, "bias"), false);
|
||||
// ViT merger: MLP downsample
|
||||
model.vit_merger_ds_ln_w = get_tensor(string_format(TN_VIT_MERGER_DS_LN, "weight"));
|
||||
model.vit_merger_ds_ln_b = get_tensor(string_format(TN_VIT_MERGER_DS_LN, "bias"));
|
||||
model.vit_merger_ds_up_w = get_tensor(string_format(TN_VIT_MERGER_DS_UP, "weight"));
|
||||
model.vit_merger_ds_up_b = get_tensor(string_format(TN_VIT_MERGER_DS_UP, "bias"), false);
|
||||
model.vit_merger_ds_down_w = get_tensor(string_format(TN_VIT_MERGER_DS_DOWN, "weight"));
|
||||
model.vit_merger_ds_down_b = get_tensor(string_format(TN_VIT_MERGER_DS_DOWN, "bias"), false);
|
||||
// Final Merger (DownsampleMLP)
|
||||
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM);
|
||||
model.mm_input_norm_b = get_tensor(TN_MM_INP_NORM_B, false);
|
||||
model.mm_ffn_up_w = get_tensor(string_format(TN_MM_UP, "weight"));
|
||||
model.mm_ffn_up_b = get_tensor(string_format(TN_MM_UP, "bias"), false);
|
||||
model.mm_ffn_down_w = get_tensor(string_format(TN_MM_DOWN, "weight"));
|
||||
model.mm_ffn_down_b = get_tensor(string_format(TN_MM_DOWN, "bias"), false);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GLM_EDGE:
|
||||
{
|
||||
model.mm_model_adapter_conv_w = get_tensor(string_format(TN_GLM_ADAPER_CONV, "weight"));
|
||||
@@ -2415,6 +2480,83 @@ struct clip_model_loader {
|
||||
layer.conv_pw2_b = get_tensor(string_format(TN_CONV_PW2, prefix, il, "bias"));
|
||||
}
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
model.inp_proj_w = get_tensor(string_format(TN_INP_PROJ, "weight"));
|
||||
model.inp_proj_b = get_tensor(string_format(TN_INP_PROJ, "bias"));
|
||||
model.ctc_out_w = get_tensor(string_format(TN_CTC_OUT, "weight"));
|
||||
model.ctc_out_b = get_tensor(string_format(TN_CTC_OUT, "bias"));
|
||||
model.ctc_out_mid_w = get_tensor(string_format(TN_CTC_OUT_MID, "weight"));
|
||||
model.ctc_out_mid_b = get_tensor(string_format(TN_CTC_OUT_MID, "bias"));
|
||||
|
||||
// per-layer tensors not loaded by the generic loop above
|
||||
for (int il = 0; il < hparams.n_layer; ++il) {
|
||||
auto & layer = model.layers[il];
|
||||
|
||||
layer.attn_rel_pos_emb = get_tensor(string_format(TN_ATTN_REL_POS_EMB, prefix, il));
|
||||
|
||||
layer.ff_norm_w = get_tensor(string_format(TN_FFN_NORM, prefix, il, "weight"));
|
||||
layer.ff_norm_b = get_tensor(string_format(TN_FFN_NORM, prefix, il, "bias"));
|
||||
|
||||
layer.ff_norm_1_w = get_tensor(string_format(TN_FFN_NORM_1, prefix, il, "weight"));
|
||||
layer.ff_norm_1_b = get_tensor(string_format(TN_FFN_NORM_1, prefix, il, "bias"));
|
||||
layer.ff_up_1_w = get_tensor(string_format(TN_FFN_UP_1, prefix, il, "weight"));
|
||||
layer.ff_up_1_b = get_tensor(string_format(TN_FFN_UP_1, prefix, il, "bias"));
|
||||
layer.ff_down_1_w = get_tensor(string_format(TN_FFN_DOWN_1, prefix, il, "weight"));
|
||||
layer.ff_down_1_b = get_tensor(string_format(TN_FFN_DOWN_1, prefix, il, "bias"));
|
||||
|
||||
layer.norm_conv_w = get_tensor(string_format(TN_NORM_CONV, prefix, il, "weight"));
|
||||
layer.norm_conv_b = get_tensor(string_format(TN_NORM_CONV, prefix, il, "bias"));
|
||||
layer.conv_norm_w = get_tensor(string_format(TN_CONV_NORM, prefix, il, "weight"));
|
||||
layer.conv_norm_b = get_tensor(string_format(TN_CONV_NORM, prefix, il, "bias"));
|
||||
layer.conv_dw_w = get_tensor(string_format(TN_CONV_DW, prefix, il, "weight"));
|
||||
layer.conv_pw1_w = get_tensor(string_format(TN_CONV_PW1, prefix, il, "weight"));
|
||||
layer.conv_pw1_b = get_tensor(string_format(TN_CONV_PW1, prefix, il, "bias"));
|
||||
layer.conv_pw2_w = get_tensor(string_format(TN_CONV_PW2, prefix, il, "weight"));
|
||||
layer.conv_pw2_b = get_tensor(string_format(TN_CONV_PW2, prefix, il, "bias"));
|
||||
}
|
||||
|
||||
model.qf_proj_query = get_tensor(TN_QF_PROJ_QUERY);
|
||||
model.qf_proj_norm_w = get_tensor(string_format(TN_QF_PROJ_NORM, "weight"));
|
||||
model.qf_proj_norm_b = get_tensor(string_format(TN_QF_PROJ_NORM, "bias"));
|
||||
model.qf_proj_linear_w = get_tensor(string_format(TN_QF_PROJ_LINEAR, "weight"));
|
||||
model.qf_proj_linear_b = get_tensor(string_format(TN_QF_PROJ_LINEAR, "bias"));
|
||||
|
||||
const int n_proj_layers = 2;
|
||||
model.qf_proj_layers.resize(n_proj_layers);
|
||||
for (int il = 0; il < n_proj_layers; ++il) {
|
||||
auto & pl = model.qf_proj_layers[il];
|
||||
|
||||
pl.q_w = get_tensor(string_format(TN_QF_SELF_ATTN_Q, il, "weight"));
|
||||
pl.q_b = get_tensor(string_format(TN_QF_SELF_ATTN_Q, il, "bias"));
|
||||
pl.k_w = get_tensor(string_format(TN_QF_SELF_ATTN_K, il, "weight"));
|
||||
pl.k_b = get_tensor(string_format(TN_QF_SELF_ATTN_K, il, "bias"));
|
||||
pl.v_w = get_tensor(string_format(TN_QF_SELF_ATTN_V, il, "weight"));
|
||||
pl.v_b = get_tensor(string_format(TN_QF_SELF_ATTN_V, il, "bias"));
|
||||
pl.o_w = get_tensor(string_format(TN_QF_SELF_ATTN_O, il, "weight"));
|
||||
pl.o_b = get_tensor(string_format(TN_QF_SELF_ATTN_O, il, "bias"));
|
||||
pl.ln_1_w = get_tensor(string_format(TN_QF_SELF_ATTN_N, il, "weight"));
|
||||
pl.ln_1_b = get_tensor(string_format(TN_QF_SELF_ATTN_N, il, "bias"));
|
||||
|
||||
pl.cross_attn_q_w = get_tensor(string_format(TN_QF_CROSS_ATTN_Q, il, "weight"));
|
||||
pl.cross_attn_q_b = get_tensor(string_format(TN_QF_CROSS_ATTN_Q, il, "bias"));
|
||||
pl.cross_attn_k_w = get_tensor(string_format(TN_QF_CROSS_ATTN_K, il, "weight"));
|
||||
pl.cross_attn_k_b = get_tensor(string_format(TN_QF_CROSS_ATTN_K, il, "bias"));
|
||||
pl.cross_attn_v_w = get_tensor(string_format(TN_QF_CROSS_ATTN_V, il, "weight"));
|
||||
pl.cross_attn_v_b = get_tensor(string_format(TN_QF_CROSS_ATTN_V, il, "bias"));
|
||||
pl.cross_attn_o_w = get_tensor(string_format(TN_QF_CROSS_ATTN_O, il, "weight"));
|
||||
pl.cross_attn_o_b = get_tensor(string_format(TN_QF_CROSS_ATTN_O, il, "bias"));
|
||||
pl.cross_attn_norm_w = get_tensor(string_format(TN_QF_CROSS_ATTN_N, il, "weight"));
|
||||
pl.cross_attn_norm_b = get_tensor(string_format(TN_QF_CROSS_ATTN_N, il, "bias"));
|
||||
|
||||
pl.ff_up_w = get_tensor(string_format(TN_QF_FFN_UP, il, "weight"));
|
||||
pl.ff_up_b = get_tensor(string_format(TN_QF_FFN_UP, il, "bias"));
|
||||
pl.ff_down_w = get_tensor(string_format(TN_QF_FFN_DOWN, il, "weight"));
|
||||
pl.ff_down_b = get_tensor(string_format(TN_QF_FFN_DOWN, il, "bias"));
|
||||
pl.ln_2_w = get_tensor(string_format(TN_QF_FFN_NORM, il, "weight"));
|
||||
pl.ln_2_b = get_tensor(string_format(TN_QF_FFN_NORM, il, "bias"));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
GGML_ASSERT(false && "unknown projector type");
|
||||
}
|
||||
@@ -2960,6 +3102,11 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
// ViT merger 4x + final merger 4x = 16x total spatial downsample
|
||||
n_patches = n_patches / 16;
|
||||
} break;
|
||||
case PROJECTOR_TYPE_QWEN2VL:
|
||||
case PROJECTOR_TYPE_QWEN25VL:
|
||||
case PROJECTOR_TYPE_QWEN3VL:
|
||||
@@ -3105,6 +3252,12 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
|
||||
}
|
||||
n_patches = n;
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
const int ws = ctx->model.hparams.audio_proj_window_size;
|
||||
const int ds = ctx->model.hparams.audio_proj_downsample_rate;
|
||||
n_patches = ((img->nx + ws - 1) / ws) * (ws / ds);
|
||||
} break;
|
||||
default:
|
||||
GGML_ABORT("unsupported projector type");
|
||||
}
|
||||
@@ -3276,6 +3429,92 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
}
|
||||
set_input_f32("omega", omega);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
// SigLIP position buckets (same as resampler path)
|
||||
std::vector<int32_t> positions(pos_h * pos_w);
|
||||
int bucket_coords_h[1024];
|
||||
int bucket_coords_w[1024];
|
||||
for (int i = 0; i < pos_h; i++){
|
||||
bucket_coords_h[i] = std::floor(70.0*i/pos_h);
|
||||
}
|
||||
for (int i = 0; i < pos_w; i++){
|
||||
bucket_coords_w[i] = std::floor(70.0*i/pos_w);
|
||||
}
|
||||
for (int i = 0, id = 0; i < pos_h; i++){
|
||||
for (int j = 0; j < pos_w; j++){
|
||||
positions[id++] = bucket_coords_h[i]*70 + bucket_coords_w[j];
|
||||
}
|
||||
}
|
||||
set_input_i32("positions", positions);
|
||||
|
||||
const int half_h = pos_h / 2;
|
||||
const int half_w = pos_w / 2;
|
||||
|
||||
// window reorder indices for 2x2 windows
|
||||
std::vector<int32_t> window_idx(n_pos);
|
||||
std::vector<int32_t> inv_window_idx(n_pos);
|
||||
{
|
||||
int k = 0;
|
||||
for (int wi = 0; wi < half_h; wi++) {
|
||||
for (int wj = 0; wj < half_w; wj++) {
|
||||
window_idx[k++] = (2*wi ) * pos_w + (2*wj );
|
||||
window_idx[k++] = (2*wi ) * pos_w + (2*wj + 1);
|
||||
window_idx[k++] = (2*wi + 1) * pos_w + (2*wj );
|
||||
window_idx[k++] = (2*wi + 1) * pos_w + (2*wj + 1);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n_pos; i++) {
|
||||
inv_window_idx[window_idx[i]] = i;
|
||||
}
|
||||
}
|
||||
set_input_i32("vit_merger_window_idx", window_idx);
|
||||
set_input_i32("vit_merger_inv_window_idx", inv_window_idx);
|
||||
|
||||
// block-diagonal attention mask: tokens in the same 4-token
|
||||
// window attend to each other (mask = 0), all other positions
|
||||
// are masked out (-inf). matches the window-major reorder above.
|
||||
std::vector<float> window_mask_data(n_pos * n_pos, std::numeric_limits<float>::lowest());
|
||||
for (int wi = 0; wi < n_pos / 4; wi++) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
window_mask_data[(wi*4 + i) * n_pos + (wi*4 + j)] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
set_input_f32("vit_merger_window_mask", window_mask_data);
|
||||
|
||||
// ViT merger 2x2 downsample indices
|
||||
auto make_ds_idx = [](int off_r, int off_c, int ds_h, int ds_w, int stride_w) {
|
||||
std::vector<int32_t> idx(ds_h * ds_w);
|
||||
for (int i = 0; i < ds_h; i++) {
|
||||
for (int j = 0; j < ds_w; j++) {
|
||||
idx[i * ds_w + j] = (2*i + off_r) * stride_w + (2*j + off_c);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
};
|
||||
auto vit_merger_ds_0 = make_ds_idx(0, 0, half_h, half_w, pos_w);
|
||||
auto vit_merger_ds_1 = make_ds_idx(0, 1, half_h, half_w, pos_w);
|
||||
auto vit_merger_ds_2 = make_ds_idx(1, 0, half_h, half_w, pos_w);
|
||||
auto vit_merger_ds_3 = make_ds_idx(1, 1, half_h, half_w, pos_w);
|
||||
set_input_i32("vit_merger_ds_idx_0", vit_merger_ds_0);
|
||||
set_input_i32("vit_merger_ds_idx_1", vit_merger_ds_1);
|
||||
set_input_i32("vit_merger_ds_idx_2", vit_merger_ds_2);
|
||||
set_input_i32("vit_merger_ds_idx_3", vit_merger_ds_3);
|
||||
|
||||
// final merger 2x2 downsample indices (operates on half_h x half_w grid)
|
||||
const int qh = half_h / 2;
|
||||
const int qw = half_w / 2;
|
||||
auto m_ds_0 = make_ds_idx(0, 0, qh, qw, half_w);
|
||||
auto m_ds_1 = make_ds_idx(0, 1, qh, qw, half_w);
|
||||
auto m_ds_2 = make_ds_idx(1, 0, qh, qw, half_w);
|
||||
auto m_ds_3 = make_ds_idx(1, 1, qh, qw, half_w);
|
||||
set_input_i32("merger_ds_idx_0", m_ds_0);
|
||||
set_input_i32("merger_ds_idx_1", m_ds_1);
|
||||
set_input_i32("merger_ds_idx_2", m_ds_2);
|
||||
set_input_i32("merger_ds_idx_3", m_ds_3);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_QWEN2VL:
|
||||
case PROJECTOR_TYPE_QWEN3VL:
|
||||
case PROJECTOR_TYPE_GLM4V:
|
||||
@@ -3701,6 +3940,39 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
}
|
||||
set_input_f32("pos_emb", pos_emb);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
const int context_size = ctx->model.hparams.audio_chunk_size;
|
||||
const int max_pos_emb = ctx->model.hparams.audio_max_pos_emb;
|
||||
|
||||
std::vector<int32_t> dists(context_size * context_size);
|
||||
for (int i = 0; i < context_size; i++) {
|
||||
for (int j = 0; j < context_size; j++) {
|
||||
int d = i - j;
|
||||
if (d < -context_size) d = -context_size;
|
||||
if (d > context_size) d = context_size;
|
||||
dists[i * context_size + j] = d + max_pos_emb;
|
||||
}
|
||||
}
|
||||
set_input_i32("attn_dists", dists);
|
||||
|
||||
const int n_frames = image_size_width;
|
||||
const int remainder = n_frames % context_size;
|
||||
if (remainder > 0) {
|
||||
const int num_blocks = (n_frames + context_size - 1) / context_size;
|
||||
std::vector<float> mask(context_size * context_size * num_blocks, 0.0f);
|
||||
const float neg_inf = -INFINITY;
|
||||
const int last_block_offset = (num_blocks - 1) * context_size * context_size;
|
||||
for (int q = 0; q < context_size; q++) {
|
||||
for (int k = 0; k < context_size; k++) {
|
||||
if (q >= remainder || k >= remainder) {
|
||||
mask[last_block_offset + q * context_size + k] = neg_inf;
|
||||
}
|
||||
}
|
||||
}
|
||||
set_input_f32("attn_mask", mask);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
GGML_ABORT("Unknown projector type");
|
||||
}
|
||||
@@ -3797,6 +4069,8 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||
return ctx->model.mm_3_b->ne[0];
|
||||
case PROJECTOR_TYPE_MINICPMV:
|
||||
return ctx->model.mm_model_proj->ne[0];
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
return ctx->model.mm_ffn_down_w->ne[1];
|
||||
case PROJECTOR_TYPE_GLM_EDGE:
|
||||
return ctx->model.mm_model_mlp_3_w->ne[1];
|
||||
case PROJECTOR_TYPE_QWEN2VL:
|
||||
@@ -3849,6 +4123,8 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||
return ctx->model.position_embeddings->ne[0];
|
||||
case PROJECTOR_TYPE_GEMMA4A:
|
||||
return ctx->model.hparams.projection_dim;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
return ctx->model.qf_proj_linear_w->ne[1];
|
||||
case PROJECTOR_TYPE_GLM4V:
|
||||
return ctx->model.mm_ffn_down_w->ne[1];
|
||||
default:
|
||||
@@ -3861,6 +4137,9 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) {
|
||||
if (ctx->proj_type() == PROJECTOR_TYPE_MINICPMV) {
|
||||
return ctx->model.hparams.minicpmv_version;
|
||||
}
|
||||
if (ctx->proj_type() == PROJECTOR_TYPE_MINICPMV4_6) {
|
||||
return 46;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,8 @@ int main(int argc, char ** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
ggml_backend_load_all();
|
||||
|
||||
LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str());
|
||||
|
||||
mtmd::context_ptr ctx_mtmd;
|
||||
|
||||
275
tools/mtmd/models/granite-speech.cpp
Normal file
275
tools/mtmd/models/granite-speech.cpp
Normal file
@@ -0,0 +1,275 @@
|
||||
#include "models.h"
|
||||
|
||||
ggml_cgraph * clip_graph_granite_speech::build() {
|
||||
const int n_frames = img.nx;
|
||||
const int context_size = hparams.audio_chunk_size;
|
||||
const int ctc_layer = n_layer / 2;
|
||||
const int conv_kernel = hparams.audio_conv_kernel_size;
|
||||
const int conv_pad = conv_kernel / 2;
|
||||
|
||||
const int num_blocks = (n_frames + context_size - 1) / context_size;
|
||||
const int padded_len = num_blocks * context_size;
|
||||
const int remainder = n_frames % context_size;
|
||||
|
||||
ggml_tensor * attn_dists = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, context_size * context_size);
|
||||
ggml_set_name(attn_dists, "attn_dists");
|
||||
ggml_set_input(attn_dists);
|
||||
|
||||
ggml_tensor * attn_mask = nullptr;
|
||||
if (remainder > 0) {
|
||||
attn_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32,
|
||||
context_size, context_size, 1, num_blocks);
|
||||
ggml_set_name(attn_mask, "attn_mask");
|
||||
ggml_set_input(attn_mask);
|
||||
}
|
||||
|
||||
ggml_tensor * inp = build_inp_raw(1);
|
||||
auto * cur = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
|
||||
cb(cur, "inp_transposed", -1);
|
||||
|
||||
cur = build_mm(model.inp_proj_w, cur);
|
||||
cur = ggml_add(ctx0, cur, model.inp_proj_b);
|
||||
cb(cur, "inp_linear", -1);
|
||||
|
||||
for (int il = 0; il < n_layer; il++) {
|
||||
const auto & layer = model.layers[il];
|
||||
auto * residual = cur;
|
||||
|
||||
// ffn1 (half-step)
|
||||
{
|
||||
auto * ffn1 = build_norm(cur, layer.ff_norm_w, layer.ff_norm_b,
|
||||
NORM_TYPE_NORMAL, eps, il);
|
||||
cb(ffn1, "ffn1_norm", il);
|
||||
|
||||
ffn1 = build_ffn(ffn1,
|
||||
layer.ff_up_w, layer.ff_up_b,
|
||||
nullptr, nullptr,
|
||||
layer.ff_down_w, layer.ff_down_b,
|
||||
FFN_SILU, il);
|
||||
cb(ffn1, "ffn1_out", il);
|
||||
|
||||
residual = ggml_add(ctx0, residual, ggml_scale(ctx0, ffn1, 0.5f));
|
||||
cb(residual, "ffn1_residual", il);
|
||||
}
|
||||
|
||||
// build_attn not used here: Shaw RPE needs pos_attn = mul_mat(pos_emb, Q)
|
||||
// injected between KQ product and softmax, which build_attn doesn't support
|
||||
{
|
||||
auto * normed = build_norm(residual, layer.ln_1_w, layer.ln_1_b,
|
||||
NORM_TYPE_NORMAL, eps, il);
|
||||
cb(normed, "attn_norm", il);
|
||||
|
||||
if (n_frames < padded_len) {
|
||||
normed = ggml_pad(ctx0, normed, 0, padded_len - n_frames, 0, 0);
|
||||
}
|
||||
|
||||
ggml_tensor * Q = build_mm(layer.q_w, normed);
|
||||
ggml_tensor * K = build_mm(layer.k_w, normed);
|
||||
ggml_tensor * V = build_mm(layer.v_w, normed);
|
||||
|
||||
Q = ggml_reshape_4d(ctx0, Q, d_head, n_head, context_size, num_blocks);
|
||||
K = ggml_reshape_4d(ctx0, K, d_head, n_head, context_size, num_blocks);
|
||||
V = ggml_reshape_4d(ctx0, V, d_head, n_head, context_size, num_blocks);
|
||||
|
||||
ggml_tensor * Q_perm = ggml_permute(ctx0, Q, 0, 2, 1, 3);
|
||||
ggml_tensor * K_perm = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
|
||||
|
||||
ggml_tensor * kq = ggml_mul_mat(ctx0, K_perm, Q_perm);
|
||||
|
||||
// Shaw RPE: pos_emb ne[2]=1 broadcasts against Q ne[2]=num_blocks in mul_mat
|
||||
ggml_tensor * pos_emb = ggml_get_rows(ctx0, layer.attn_rel_pos_emb, attn_dists);
|
||||
pos_emb = ggml_reshape_3d(ctx0, pos_emb, d_head, context_size, context_size);
|
||||
pos_emb = ggml_reshape_4d(ctx0, pos_emb, d_head, context_size, 1, context_size);
|
||||
|
||||
ggml_tensor * Q_shaw = ggml_permute(ctx0, Q, 0, 1, 3, 2);
|
||||
ggml_tensor * pos_attn = ggml_mul_mat(ctx0, pos_emb, Q_shaw);
|
||||
pos_attn = ggml_cont(ctx0, ggml_permute(ctx0, pos_attn, 0, 2, 3, 1));
|
||||
|
||||
ggml_tensor * scores = ggml_add(ctx0, kq, pos_attn);
|
||||
ggml_tensor * attn_weights = ggml_soft_max_ext(ctx0, scores, attn_mask,
|
||||
kq_scale, 0.0f);
|
||||
|
||||
ggml_tensor * V_perm = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3));
|
||||
ggml_tensor * attn_out = ggml_mul_mat(ctx0, V_perm, attn_weights);
|
||||
|
||||
attn_out = ggml_permute(ctx0, attn_out, 0, 2, 1, 3);
|
||||
attn_out = ggml_cont_2d(ctx0, attn_out, n_embd, padded_len);
|
||||
|
||||
if (n_frames < padded_len) {
|
||||
attn_out = ggml_view_2d(ctx0, attn_out,
|
||||
n_embd, n_frames, attn_out->nb[1], 0);
|
||||
}
|
||||
|
||||
cur = build_mm(layer.o_w, attn_out);
|
||||
cur = ggml_add(ctx0, cur, layer.o_b);
|
||||
cb(cur, "attn_out", il);
|
||||
}
|
||||
|
||||
residual = ggml_add(ctx0, residual, cur);
|
||||
|
||||
// conv module
|
||||
{
|
||||
cur = build_norm(residual, layer.norm_conv_w, layer.norm_conv_b,
|
||||
NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "conv_norm", il);
|
||||
|
||||
auto * x = build_mm(layer.conv_pw1_w, cur);
|
||||
x = ggml_add(ctx0, x, layer.conv_pw1_b);
|
||||
cb(x, "conv_pw1", il);
|
||||
|
||||
// GLU: ggml has no fused op, manual split + sigmoid gate
|
||||
{
|
||||
int64_t d = x->ne[0] / 2;
|
||||
ggml_tensor * gate = ggml_sigmoid(ctx0,
|
||||
ggml_view_2d(ctx0, x, d, x->ne[1], x->nb[1], d * x->nb[0]));
|
||||
x = ggml_mul(ctx0,
|
||||
ggml_view_2d(ctx0, x, d, x->ne[1], x->nb[1], 0), gate);
|
||||
x = ggml_cont(ctx0, ggml_transpose(ctx0, x));
|
||||
}
|
||||
cb(x, "conv_glu", il);
|
||||
|
||||
x = ggml_pad(ctx0, x, conv_pad, 0, 0, 0);
|
||||
x = ggml_roll(ctx0, x, conv_pad, 0, 0, 0);
|
||||
x = ggml_pad(ctx0, x, conv_pad, 0, 0, 0);
|
||||
x = ggml_ssm_conv(ctx0, x, layer.conv_dw_w);
|
||||
cb(x, "conv_dw", il);
|
||||
|
||||
// folded batch norm
|
||||
x = ggml_add(ctx0, ggml_mul(ctx0, x, layer.conv_norm_w), layer.conv_norm_b);
|
||||
x = ggml_silu(ctx0, x);
|
||||
cb(x, "conv_bn_silu", il);
|
||||
|
||||
x = build_mm(layer.conv_pw2_w, x);
|
||||
x = ggml_add(ctx0, x, layer.conv_pw2_b);
|
||||
cb(x, "conv_pw2", il);
|
||||
|
||||
cur = x;
|
||||
}
|
||||
|
||||
residual = ggml_add(ctx0, residual, cur);
|
||||
|
||||
// ffn2 (half-step)
|
||||
{
|
||||
auto * ffn2 = build_norm(residual, layer.ff_norm_1_w, layer.ff_norm_1_b,
|
||||
NORM_TYPE_NORMAL, eps, il);
|
||||
cb(ffn2, "ffn2_norm", il);
|
||||
|
||||
ffn2 = build_ffn(ffn2,
|
||||
layer.ff_up_1_w, layer.ff_up_1_b,
|
||||
nullptr, nullptr,
|
||||
layer.ff_down_1_w, layer.ff_down_1_b,
|
||||
FFN_SILU, il);
|
||||
cb(ffn2, "ffn2_out", il);
|
||||
|
||||
residual = ggml_add(ctx0, residual, ggml_scale(ctx0, ffn2, 0.5f));
|
||||
}
|
||||
|
||||
cur = build_norm(residual, layer.ln_2_w, layer.ln_2_b,
|
||||
NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "layer_out", il);
|
||||
|
||||
// CTC branch
|
||||
if (il + 1 == ctc_layer) {
|
||||
auto * mid = build_mm(model.ctc_out_w, cur);
|
||||
mid = ggml_add(ctx0, mid, model.ctc_out_b);
|
||||
mid = ggml_soft_max(ctx0, mid);
|
||||
mid = build_mm(model.ctc_out_mid_w, mid);
|
||||
mid = ggml_add(ctx0, mid, model.ctc_out_mid_b);
|
||||
cur = ggml_add(ctx0, cur, mid);
|
||||
cb(cur, "ctc_branch", il);
|
||||
}
|
||||
}
|
||||
|
||||
cb(cur, "encoder_out", -1);
|
||||
|
||||
// QFormer projector
|
||||
{
|
||||
const int window_size = hparams.audio_proj_window_size;
|
||||
const int num_queries = window_size / hparams.audio_proj_downsample_rate;
|
||||
const int proj_n_head = hparams.audio_proj_head_count;
|
||||
const int proj_d_head = n_embd / proj_n_head;
|
||||
const float proj_kq_scale = 1.0f / sqrtf((float)proj_d_head);
|
||||
const float proj_eps = 1e-12f;
|
||||
const int nblocks_proj = (n_frames + window_size - 1) / window_size;
|
||||
const int padded_proj = nblocks_proj * window_size;
|
||||
|
||||
if (n_frames < padded_proj) {
|
||||
cur = ggml_pad(ctx0, cur, 0, padded_proj - n_frames, 0, 0);
|
||||
}
|
||||
|
||||
ggml_tensor * enc_windows = ggml_reshape_3d(ctx0, cur, n_embd, window_size, nblocks_proj);
|
||||
|
||||
ggml_tensor * queries = build_norm(model.qf_proj_query,
|
||||
model.qf_proj_norm_w, model.qf_proj_norm_b,
|
||||
NORM_TYPE_NORMAL, proj_eps, -1);
|
||||
{
|
||||
ggml_tensor * q_3d = ggml_reshape_3d(ctx0, queries, n_embd, num_queries, 1);
|
||||
ggml_tensor * q_shape = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32,
|
||||
n_embd, num_queries, nblocks_proj);
|
||||
queries = ggml_repeat(ctx0, q_3d, q_shape);
|
||||
}
|
||||
|
||||
for (int il = 0; il < (int)model.qf_proj_layers.size(); il++) {
|
||||
const auto & pl = model.qf_proj_layers[il];
|
||||
|
||||
// self-attention
|
||||
{
|
||||
ggml_tensor * Q = ggml_add(ctx0, build_mm(pl.q_w, queries), pl.q_b);
|
||||
ggml_tensor * K = ggml_add(ctx0, build_mm(pl.k_w, queries), pl.k_b);
|
||||
ggml_tensor * V = ggml_add(ctx0, build_mm(pl.v_w, queries), pl.v_b);
|
||||
|
||||
Q = ggml_reshape_4d(ctx0, Q, proj_d_head, proj_n_head, num_queries, nblocks_proj);
|
||||
K = ggml_reshape_4d(ctx0, K, proj_d_head, proj_n_head, num_queries, nblocks_proj);
|
||||
V = ggml_reshape_4d(ctx0, V, proj_d_head, proj_n_head, num_queries, nblocks_proj);
|
||||
|
||||
ggml_tensor * sa_out = build_attn(pl.o_w, pl.o_b,
|
||||
Q, K, V, nullptr, proj_kq_scale, il);
|
||||
sa_out = ggml_reshape_3d(ctx0, sa_out, n_embd, num_queries, nblocks_proj);
|
||||
|
||||
queries = build_norm(ggml_add(ctx0, sa_out, queries),
|
||||
pl.ln_1_w, pl.ln_1_b,
|
||||
NORM_TYPE_NORMAL, proj_eps, il);
|
||||
}
|
||||
|
||||
// cross-attention
|
||||
{
|
||||
ggml_tensor * Q = ggml_add(ctx0, build_mm(pl.cross_attn_q_w, queries), pl.cross_attn_q_b);
|
||||
ggml_tensor * K = ggml_add(ctx0, build_mm(pl.cross_attn_k_w, enc_windows), pl.cross_attn_k_b);
|
||||
ggml_tensor * V = ggml_add(ctx0, build_mm(pl.cross_attn_v_w, enc_windows), pl.cross_attn_v_b);
|
||||
|
||||
Q = ggml_reshape_4d(ctx0, Q, proj_d_head, proj_n_head, num_queries, nblocks_proj);
|
||||
K = ggml_reshape_4d(ctx0, K, proj_d_head, proj_n_head, window_size, nblocks_proj);
|
||||
V = ggml_reshape_4d(ctx0, V, proj_d_head, proj_n_head, window_size, nblocks_proj);
|
||||
|
||||
ggml_tensor * ca_out = build_attn(pl.cross_attn_o_w, pl.cross_attn_o_b,
|
||||
Q, K, V, nullptr, proj_kq_scale, il);
|
||||
ca_out = ggml_reshape_3d(ctx0, ca_out, n_embd, num_queries, nblocks_proj);
|
||||
|
||||
queries = build_norm(ggml_add(ctx0, ca_out, queries),
|
||||
pl.cross_attn_norm_w, pl.cross_attn_norm_b,
|
||||
NORM_TYPE_NORMAL, proj_eps, il);
|
||||
}
|
||||
|
||||
// ffn
|
||||
{
|
||||
ggml_tensor * ffn_out = build_ffn(queries,
|
||||
pl.ff_up_w, pl.ff_up_b,
|
||||
nullptr, nullptr,
|
||||
pl.ff_down_w, pl.ff_down_b,
|
||||
FFN_GELU, il);
|
||||
|
||||
queries = build_norm(ggml_add(ctx0, ffn_out, queries),
|
||||
pl.ln_2_w, pl.ln_2_b,
|
||||
NORM_TYPE_NORMAL, proj_eps, il);
|
||||
}
|
||||
}
|
||||
|
||||
cur = ggml_reshape_2d(ctx0, queries, n_embd, num_queries * nblocks_proj);
|
||||
cur = ggml_add(ctx0, build_mm(model.qf_proj_linear_w, cur), model.qf_proj_linear_b);
|
||||
cb(cur, "projector_out", -1);
|
||||
}
|
||||
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
||||
return gf;
|
||||
}
|
||||
@@ -112,3 +112,294 @@ ggml_cgraph * clip_graph_minicpmv::build() {
|
||||
|
||||
return gf;
|
||||
}
|
||||
|
||||
ggml_cgraph * clip_graph_minicpmv4_6::build() {
|
||||
const int insert_lid = hparams.insert_layer_id;
|
||||
const int n_pos = n_patches;
|
||||
const int half_h = n_patches_y / 2;
|
||||
const int half_w = n_patches_x / 2;
|
||||
const int n_ds = half_h * half_w; // after ViT merger 2x2 downsample
|
||||
const int qh = half_h / 2;
|
||||
const int qw = half_w / 2;
|
||||
const int n_ds2 = qh * qw; // after final merger 2x2 downsample
|
||||
|
||||
auto add_i32_input = [&](const char * name, int n) {
|
||||
ggml_tensor * t = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n);
|
||||
ggml_set_name(t, name);
|
||||
ggml_set_input(t);
|
||||
return t;
|
||||
};
|
||||
|
||||
// position indices for ViT learned positional embeddings
|
||||
ggml_tensor * positions = add_i32_input("positions", n_pos);
|
||||
ggml_tensor * learned_pos_embd = ggml_get_rows(ctx0, model.position_embeddings, positions);
|
||||
|
||||
// ViT merger window reorder indices + block-diagonal mask
|
||||
// (mask layout follows qwen2vl: -inf except for 4x4 blocks on the diagonal,
|
||||
// so each window-major group of 4 tokens only attends to itself)
|
||||
ggml_tensor * vit_merger_window_idx = add_i32_input("vit_merger_window_idx", n_pos);
|
||||
ggml_tensor * vit_merger_inv_window_idx = add_i32_input("vit_merger_inv_window_idx", n_pos);
|
||||
ggml_tensor * vit_merger_window_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_pos, n_pos);
|
||||
ggml_set_name(vit_merger_window_mask, "vit_merger_window_mask");
|
||||
ggml_set_input(vit_merger_window_mask);
|
||||
if (flash_attn_type == CLIP_FLASH_ATTN_TYPE_ENABLED) {
|
||||
vit_merger_window_mask = ggml_cast(ctx0, vit_merger_window_mask, GGML_TYPE_F16);
|
||||
}
|
||||
|
||||
// ViT merger 2x2 downsample gather indices
|
||||
ggml_tensor * vit_merger_ds_idx_0 = add_i32_input("vit_merger_ds_idx_0", n_ds);
|
||||
ggml_tensor * vit_merger_ds_idx_1 = add_i32_input("vit_merger_ds_idx_1", n_ds);
|
||||
ggml_tensor * vit_merger_ds_idx_2 = add_i32_input("vit_merger_ds_idx_2", n_ds);
|
||||
ggml_tensor * vit_merger_ds_idx_3 = add_i32_input("vit_merger_ds_idx_3", n_ds);
|
||||
|
||||
// final merger 2x2 downsample gather indices
|
||||
ggml_tensor * merger_ds_idx_0 = add_i32_input("merger_ds_idx_0", n_ds2);
|
||||
ggml_tensor * merger_ds_idx_1 = add_i32_input("merger_ds_idx_1", n_ds2);
|
||||
ggml_tensor * merger_ds_idx_2 = add_i32_input("merger_ds_idx_2", n_ds2);
|
||||
ggml_tensor * merger_ds_idx_3 = add_i32_input("merger_ds_idx_3", n_ds2);
|
||||
|
||||
// patch embedding + positional embedding
|
||||
ggml_tensor * inp = build_inp();
|
||||
inp = ggml_add(ctx0, inp, learned_pos_embd);
|
||||
cb(inp, "pos_embed", -1);
|
||||
|
||||
ggml_tensor * inpL = inp;
|
||||
if (model.pre_ln_w) {
|
||||
inpL = build_norm(inpL, model.pre_ln_w, model.pre_ln_b, NORM_TYPE_NORMAL, eps, -1);
|
||||
cb(inpL, "pre_ln", -1);
|
||||
}
|
||||
|
||||
// ViT layers 0..insert_layer_id (inclusive)
|
||||
// Mirrors the separate-qkv path of clip_graph::build_vit so the two manually
|
||||
// unrolled segments around the ViT merger read like build_vit() expansions.
|
||||
for (int il = 0; il <= insert_lid; il++) {
|
||||
auto & layer = model.layers[il];
|
||||
ggml_tensor * cur = inpL;
|
||||
|
||||
cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "layer_inp_normed", il);
|
||||
|
||||
{
|
||||
ggml_tensor * Qcur = build_mm(layer.q_w, cur);
|
||||
if (layer.q_b) {
|
||||
Qcur = ggml_add(ctx0, Qcur, layer.q_b);
|
||||
}
|
||||
ggml_tensor * Kcur = build_mm(layer.k_w, cur);
|
||||
if (layer.k_b) {
|
||||
Kcur = ggml_add(ctx0, Kcur, layer.k_b);
|
||||
}
|
||||
ggml_tensor * Vcur = build_mm(layer.v_w, cur);
|
||||
if (layer.v_b) {
|
||||
Vcur = ggml_add(ctx0, Vcur, layer.v_b);
|
||||
}
|
||||
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos);
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(Kcur, "Kcur", il);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
cur = build_attn(layer.o_w, layer.o_b, Qcur, Kcur, Vcur, nullptr, kq_scale, il);
|
||||
cb(cur, "attn_out", il);
|
||||
}
|
||||
|
||||
if (layer.ls_1_w) {
|
||||
cur = ggml_mul(ctx0, cur, layer.ls_1_w);
|
||||
cb(cur, "attn_out_scaled", il);
|
||||
}
|
||||
cur = ggml_add(ctx0, cur, inpL);
|
||||
inpL = cur;
|
||||
cb(cur, "ffn_inp", il);
|
||||
|
||||
cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "ffn_inp_normed", il);
|
||||
|
||||
cur = build_ffn(cur, layer.ff_up_w, layer.ff_up_b, layer.ff_gate_w, layer.ff_gate_b,
|
||||
layer.ff_down_w, layer.ff_down_b, hparams.ffn_op, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
|
||||
if (layer.ls_2_w) {
|
||||
cur = ggml_mul(ctx0, cur, layer.ls_2_w);
|
||||
cb(cur, "ffn_out_scaled", il);
|
||||
}
|
||||
cur = ggml_add(ctx0, inpL, cur);
|
||||
cb(cur, "layer_out", il);
|
||||
|
||||
inpL = cur;
|
||||
}
|
||||
|
||||
// ViT merger: window self-attention
|
||||
// Tokens are reordered to window-major (4 tokens per window are contiguous),
|
||||
// and a block-diagonal mask restricts attention to within each window. This
|
||||
// mirrors the qwen2vl windowed-attention pattern so build_attn() can pick the
|
||||
// flash-attention path when available.
|
||||
{
|
||||
ggml_tensor * residual = inpL;
|
||||
ggml_tensor * cur = build_norm(inpL,
|
||||
model.vit_merger_ln1_w, model.vit_merger_ln1_b,
|
||||
NORM_TYPE_NORMAL, eps, -1);
|
||||
cb(cur, "vit_merger_attn_inp_normed", -1);
|
||||
|
||||
cur = ggml_get_rows(ctx0, cur, vit_merger_window_idx);
|
||||
cb(cur, "vit_merger_window_reorder", -1);
|
||||
|
||||
ggml_tensor * Qcur = build_mm(model.vit_merger_attn_q_w, cur);
|
||||
if (model.vit_merger_attn_q_b) {
|
||||
Qcur = ggml_add(ctx0, Qcur, model.vit_merger_attn_q_b);
|
||||
}
|
||||
ggml_tensor * Kcur = build_mm(model.vit_merger_attn_k_w, cur);
|
||||
if (model.vit_merger_attn_k_b) {
|
||||
Kcur = ggml_add(ctx0, Kcur, model.vit_merger_attn_k_b);
|
||||
}
|
||||
ggml_tensor * Vcur = build_mm(model.vit_merger_attn_v_w, cur);
|
||||
if (model.vit_merger_attn_v_b) {
|
||||
Vcur = ggml_add(ctx0, Vcur, model.vit_merger_attn_v_b);
|
||||
}
|
||||
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos);
|
||||
cb(Qcur, "vit_merger_Qcur", -1);
|
||||
cb(Kcur, "vit_merger_Kcur", -1);
|
||||
cb(Vcur, "vit_merger_Vcur", -1);
|
||||
|
||||
cur = build_attn(model.vit_merger_attn_o_w, model.vit_merger_attn_o_b,
|
||||
Qcur, Kcur, Vcur, vit_merger_window_mask, kq_scale, -1);
|
||||
cb(cur, "vit_merger_attn_out", -1);
|
||||
|
||||
cur = ggml_get_rows(ctx0, cur, vit_merger_inv_window_idx);
|
||||
inpL = ggml_add(ctx0, cur, residual);
|
||||
cb(inpL, "vit_merger_attn_residual", -1);
|
||||
}
|
||||
|
||||
// ViT merger: 2x2 spatial downsample + MLP (4 tokens -> 1)
|
||||
{
|
||||
ggml_tensor * p0 = ggml_get_rows(ctx0, inpL, vit_merger_ds_idx_0);
|
||||
ggml_tensor * p1 = ggml_get_rows(ctx0, inpL, vit_merger_ds_idx_1);
|
||||
ggml_tensor * p2 = ggml_get_rows(ctx0, inpL, vit_merger_ds_idx_2);
|
||||
ggml_tensor * p3 = ggml_get_rows(ctx0, inpL, vit_merger_ds_idx_3);
|
||||
|
||||
ggml_tensor * mean_res = ggml_add(ctx0, p0, p1);
|
||||
mean_res = ggml_add(ctx0, mean_res, p2);
|
||||
mean_res = ggml_add(ctx0, mean_res, p3);
|
||||
mean_res = ggml_scale(ctx0, mean_res, 0.25f);
|
||||
cb(mean_res, "vit_merger_ds_mean_res", -1);
|
||||
|
||||
ggml_tensor * cat = ggml_concat(ctx0, p0, p1, 0);
|
||||
cat = ggml_concat(ctx0, cat, p2, 0);
|
||||
cat = ggml_concat(ctx0, cat, p3, 0);
|
||||
|
||||
ggml_tensor * cur = build_norm(cat,
|
||||
model.vit_merger_ds_ln_w, model.vit_merger_ds_ln_b,
|
||||
NORM_TYPE_NORMAL, eps, -1);
|
||||
cb(cur, "vit_merger_ds_normed", -1);
|
||||
|
||||
// ViTWindowAttentionMerger downsample MLP uses gelu_pytorch_tanh (FFN_GELU)
|
||||
cur = build_ffn(cur,
|
||||
model.vit_merger_ds_up_w, model.vit_merger_ds_up_b,
|
||||
nullptr, nullptr,
|
||||
model.vit_merger_ds_down_w, model.vit_merger_ds_down_b,
|
||||
FFN_GELU, -1);
|
||||
cb(cur, "vit_merger_ds_mlp_out", -1);
|
||||
|
||||
inpL = ggml_add(ctx0, cur, mean_res);
|
||||
cb(inpL, "vit_merger_ds_out", -1);
|
||||
}
|
||||
|
||||
// ViT layers (insert_layer_id+1)..n_layer-1, operating on the downsampled tokens
|
||||
{
|
||||
const int64_t n_pos_ds = n_ds;
|
||||
for (int il = insert_lid + 1; il < n_layer; il++) {
|
||||
auto & layer = model.layers[il];
|
||||
ggml_tensor * cur = inpL;
|
||||
|
||||
cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "layer_inp_normed", il);
|
||||
|
||||
{
|
||||
ggml_tensor * Qcur = build_mm(layer.q_w, cur);
|
||||
if (layer.q_b) {
|
||||
Qcur = ggml_add(ctx0, Qcur, layer.q_b);
|
||||
}
|
||||
ggml_tensor * Kcur = build_mm(layer.k_w, cur);
|
||||
if (layer.k_b) {
|
||||
Kcur = ggml_add(ctx0, Kcur, layer.k_b);
|
||||
}
|
||||
ggml_tensor * Vcur = build_mm(layer.v_w, cur);
|
||||
if (layer.v_b) {
|
||||
Vcur = ggml_add(ctx0, Vcur, layer.v_b);
|
||||
}
|
||||
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos_ds);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos_ds);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos_ds);
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(Kcur, "Kcur", il);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
cur = build_attn(layer.o_w, layer.o_b, Qcur, Kcur, Vcur, nullptr, kq_scale, il);
|
||||
cb(cur, "attn_out", il);
|
||||
}
|
||||
|
||||
if (layer.ls_1_w) {
|
||||
cur = ggml_mul(ctx0, cur, layer.ls_1_w);
|
||||
cb(cur, "attn_out_scaled", il);
|
||||
}
|
||||
cur = ggml_add(ctx0, cur, inpL);
|
||||
inpL = cur;
|
||||
cb(cur, "ffn_inp", il);
|
||||
|
||||
cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, NORM_TYPE_NORMAL, eps, il);
|
||||
cb(cur, "ffn_inp_normed", il);
|
||||
|
||||
cur = build_ffn(cur, layer.ff_up_w, layer.ff_up_b, layer.ff_gate_w, layer.ff_gate_b,
|
||||
layer.ff_down_w, layer.ff_down_b, hparams.ffn_op, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
|
||||
if (layer.ls_2_w) {
|
||||
cur = ggml_mul(ctx0, cur, layer.ls_2_w);
|
||||
cb(cur, "ffn_out_scaled", il);
|
||||
}
|
||||
cur = ggml_add(ctx0, inpL, cur);
|
||||
cb(cur, "layer_out", il);
|
||||
|
||||
inpL = cur;
|
||||
}
|
||||
}
|
||||
|
||||
if (model.post_ln_w) {
|
||||
inpL = build_norm(inpL, model.post_ln_w, model.post_ln_b, NORM_TYPE_NORMAL, eps, -1);
|
||||
cb(inpL, "post_ln", -1);
|
||||
}
|
||||
|
||||
// Final Merger (DownsampleMLP): another 2x2 spatial merge -> projector embedding
|
||||
{
|
||||
ggml_tensor * p0 = ggml_get_rows(ctx0, inpL, merger_ds_idx_0);
|
||||
ggml_tensor * p1 = ggml_get_rows(ctx0, inpL, merger_ds_idx_1);
|
||||
ggml_tensor * p2 = ggml_get_rows(ctx0, inpL, merger_ds_idx_2);
|
||||
ggml_tensor * p3 = ggml_get_rows(ctx0, inpL, merger_ds_idx_3);
|
||||
|
||||
ggml_tensor * cat = ggml_concat(ctx0, p0, p1, 0);
|
||||
cat = ggml_concat(ctx0, cat, p2, 0);
|
||||
cat = ggml_concat(ctx0, cat, p3, 0);
|
||||
|
||||
ggml_tensor * cur = build_norm(cat,
|
||||
model.mm_input_norm_w, model.mm_input_norm_b,
|
||||
NORM_TYPE_NORMAL, eps, -1);
|
||||
cb(cur, "merger_normed", -1);
|
||||
|
||||
// MiniCPMV4_6DownsampleMLP uses nn.GELU() (erf-based, FFN_GELU_ERF)
|
||||
cur = build_ffn(cur,
|
||||
model.mm_ffn_up_w, model.mm_ffn_up_b,
|
||||
nullptr, nullptr,
|
||||
model.mm_ffn_down_w, model.mm_ffn_down_b,
|
||||
FFN_GELU_ERF, -1);
|
||||
cb(cur, "merger_out", -1);
|
||||
|
||||
inpL = cur;
|
||||
}
|
||||
|
||||
ggml_build_forward_expand(gf, inpL);
|
||||
return gf;
|
||||
}
|
||||
|
||||
@@ -56,6 +56,11 @@ struct clip_graph_minicpmv : clip_graph {
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
struct clip_graph_minicpmv4_6 : clip_graph {
|
||||
clip_graph_minicpmv4_6(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
struct clip_graph_internvl : clip_graph {
|
||||
clip_graph_internvl(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
@@ -111,6 +116,11 @@ struct clip_graph_conformer : clip_graph {
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
struct clip_graph_granite_speech : clip_graph {
|
||||
clip_graph_granite_speech(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
struct clip_graph_gemma4a : clip_graph {
|
||||
clip_graph_gemma4a(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
|
||||
@@ -403,6 +403,11 @@ static bool log_mel_spectrogram(
|
||||
return false;
|
||||
}
|
||||
std::reverse_copy(samples + 1, samples + 1 + stage_2_pad, samples_padded.begin());
|
||||
|
||||
// expose the padded buffer to downstream FFT and to out.n_len computation
|
||||
// mirrors the no_padding and center_padding branches above
|
||||
samples = samples_padded.data();
|
||||
n_samples = samples_padded.size();
|
||||
}
|
||||
|
||||
// preemphasis
|
||||
@@ -650,6 +655,108 @@ bool mtmd_audio_preprocessor_conformer::preprocess(const float *
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// mtmd_audio_preprocessor_granite_speech
|
||||
//
|
||||
|
||||
void mtmd_audio_preprocessor_granite_speech::initialize() {
|
||||
cache.fill_sin_cos_table(hparams.audio_n_fft);
|
||||
cache.fill_hann_window(hparams.audio_window_len, true);
|
||||
cache.fill_mel_filterbank_matrix(
|
||||
hparams.n_mel_bins / 2, hparams.audio_n_fft, hparams.audio_sample_rate,
|
||||
0.0f, -1.0f, false, 1.0f, true);
|
||||
}
|
||||
|
||||
bool mtmd_audio_preprocessor_granite_speech::preprocess(const float * samples,
|
||||
size_t n_samples,
|
||||
std::vector<mtmd_audio_mel> & output) {
|
||||
if (n_samples == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
GGML_ASSERT(!cache.sin_vals.empty());
|
||||
GGML_ASSERT(!cache.cos_vals.empty());
|
||||
GGML_ASSERT(!cache.filters.data.empty());
|
||||
|
||||
const int n_fft = hparams.audio_n_fft;
|
||||
const int pad = n_fft / 2;
|
||||
|
||||
// reflect padding
|
||||
const int n_padded = (int)n_samples + 2 * pad;
|
||||
std::vector<float> padded(n_padded, 0.0f);
|
||||
std::copy(samples, samples + n_samples, padded.data() + pad);
|
||||
for (int i = 0; i < pad; i++) {
|
||||
int src = i + 1;
|
||||
if (src >= (int)n_samples) {
|
||||
src = (int)n_samples - 1;
|
||||
}
|
||||
padded[pad - 1 - i] = samples[src];
|
||||
}
|
||||
for (int i = 0; i < pad; i++) {
|
||||
int src = (int)n_samples - 2 - i;
|
||||
if (src < 0) {
|
||||
src = 0;
|
||||
}
|
||||
padded[pad + (int)n_samples + i] = samples[src];
|
||||
}
|
||||
|
||||
filter_params params;
|
||||
params.n_mel = hparams.n_mel_bins / 2;
|
||||
params.n_fft_bins = 1 + (n_fft / 2);
|
||||
params.hann_window_size = hparams.audio_window_len;
|
||||
params.hop_length = hparams.audio_hop_len;
|
||||
params.sample_rate = hparams.audio_sample_rate;
|
||||
params.no_padding = true;
|
||||
params.center_padding = false;
|
||||
params.preemph = 0.0f;
|
||||
params.use_natural_log = false;
|
||||
params.norm_per_feature = false;
|
||||
params.mel_floor = 1e-10f;
|
||||
|
||||
mtmd_audio_mel mel;
|
||||
if (!log_mel_spectrogram(padded.data(), n_padded, 4, params, cache, mel)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
double mmax = -1e20;
|
||||
for (int i = 0; i < mel.n_mel * mel.n_len; i++) {
|
||||
if (mel.data[i] > mmax) {
|
||||
mmax = mel.data[i];
|
||||
}
|
||||
}
|
||||
mmax -= 8.0;
|
||||
|
||||
for (int i = 0; i < mel.n_mel * mel.n_len; i++) {
|
||||
if (mel.data[i] < mmax) {
|
||||
mel.data[i] = mmax;
|
||||
}
|
||||
mel.data[i] = (mel.data[i] + 4.0) / 4.0;
|
||||
}
|
||||
|
||||
int n_frames = mel.n_len;
|
||||
if (n_frames % 2 == 1) {
|
||||
n_frames--;
|
||||
}
|
||||
const int n_mel = mel.n_mel;
|
||||
const int n_stacked = n_frames / 2;
|
||||
|
||||
mtmd_audio_mel stacked;
|
||||
stacked.n_mel = 2 * n_mel;
|
||||
stacked.n_len = n_stacked;
|
||||
stacked.n_len_org = (int)n_samples;
|
||||
stacked.data.resize(2 * n_mel * n_stacked);
|
||||
|
||||
for (int t = 0; t < n_stacked; t++) {
|
||||
for (int m = 0; m < n_mel; m++) {
|
||||
stacked.data[m * n_stacked + t] = mel.data[m * mel.n_len + 2 * t];
|
||||
stacked.data[(m + n_mel) * n_stacked + t] = mel.data[m * mel.n_len + 2 * t + 1];
|
||||
}
|
||||
}
|
||||
|
||||
output.push_back(std::move(stacked));
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// mtmd_audio_preprocessor_gemma4a
|
||||
//
|
||||
|
||||
@@ -78,6 +78,15 @@ struct mtmd_audio_preprocessor_conformer : mtmd_audio_preprocessor {
|
||||
mtmd_audio_cache cache;
|
||||
};
|
||||
|
||||
struct mtmd_audio_preprocessor_granite_speech : mtmd_audio_preprocessor {
|
||||
mtmd_audio_preprocessor_granite_speech(const clip_ctx * ctx) : mtmd_audio_preprocessor(ctx) {}
|
||||
void initialize() override;
|
||||
bool preprocess(const float * samples, size_t n_samples, std::vector<mtmd_audio_mel> & output) override;
|
||||
|
||||
private:
|
||||
mtmd_audio_cache cache;
|
||||
};
|
||||
|
||||
struct mtmd_audio_preprocessor_gemma4a : mtmd_audio_preprocessor {
|
||||
mtmd_audio_preprocessor_gemma4a(const clip_ctx * ctx) : mtmd_audio_preprocessor(ctx) {}
|
||||
void initialize() override;
|
||||
|
||||
@@ -295,6 +295,8 @@ int main(int argc, char ** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
ggml_backend_load_all();
|
||||
|
||||
mtmd_cli_context ctx(params);
|
||||
LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str());
|
||||
|
||||
|
||||
@@ -584,7 +584,9 @@ bool mtmd_image_preprocessor_llava_uhd::preprocess(const clip_image_u8 & img, cl
|
||||
|
||||
mtmd_image_preprocessor_llava_uhd::slice_instructions mtmd_image_preprocessor_llava_uhd::get_slice_instructions(const clip_image_size & original_size) {
|
||||
mtmd_image_preprocessor_llava_uhd::slice_instructions res;
|
||||
const int patch_size = hparams.patch_size;
|
||||
// align slices by patch_size * n_merge so an integer number of merger output tokens fits per slice
|
||||
const int n_merge = hparams.n_merge > 0 ? hparams.n_merge : 1;
|
||||
const int patch_size = hparams.patch_size * n_merge;
|
||||
const int slice_size = hparams.image_size;
|
||||
const int original_width = original_size.width;
|
||||
const int original_height = original_size.height;
|
||||
|
||||
@@ -310,6 +310,18 @@ struct mtmd_context {
|
||||
}
|
||||
image_preproc = std::make_unique<mtmd_image_preprocessor_llava_uhd>(ctx_v);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_MINICPMV4_6:
|
||||
{
|
||||
slice_tmpl = MTMD_SLICE_TMPL_MINICPMV_2_6;
|
||||
tok_ov_img_start = {lookup_token("<image>")};
|
||||
tok_ov_img_end = {lookup_token("</image>")};
|
||||
tok_sli_img_start = {lookup_token("<slice>")};
|
||||
tok_sli_img_end = {lookup_token("</slice>")};
|
||||
tok_row_end = {lookup_token("\n")};
|
||||
tok_row_end_trail = false; // no trailing end-of-row token
|
||||
ov_img_first = true;
|
||||
image_preproc = std::make_unique<mtmd_image_preprocessor_llava_uhd>(ctx_v);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_QWEN2VL:
|
||||
case PROJECTOR_TYPE_QWEN25VL:
|
||||
case PROJECTOR_TYPE_QWEN3VL:
|
||||
@@ -532,6 +544,10 @@ struct mtmd_context {
|
||||
{
|
||||
audio_preproc = std::make_unique<mtmd_audio_preprocessor_conformer>(ctx_a);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
audio_preproc = std::make_unique<mtmd_audio_preprocessor_granite_speech>(ctx_a);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GEMMA4A:
|
||||
{
|
||||
aud_beg = "<|audio>";
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
||||
import { Settings, Plus } from '@lucide/svelte';
|
||||
import { Switch } from '$lib/components/ui/switch';
|
||||
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
|
||||
import { McpLogo, DropdownMenuSearchable } from '$lib/components/app';
|
||||
import { McpLogo, DropdownMenuSearchable, McpServerIdentity } from '$lib/components/app';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { mcpStore } from '$lib/stores/mcp.svelte';
|
||||
import { HealthCheckStatus } from '$lib/enums';
|
||||
@@ -77,6 +77,8 @@
|
||||
{@const healthState = mcpStore.getHealthCheckState(server.id)}
|
||||
{@const hasError = healthState.status === HealthCheckStatus.ERROR}
|
||||
{@const isEnabledForChat = isServerEnabledForChat(server.id)}
|
||||
{@const displayName = getServerLabel(server)}
|
||||
{@const faviconUrl = mcpStore.getServerFavicon(server.id)}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
@@ -85,18 +87,16 @@
|
||||
disabled={hasError}
|
||||
>
|
||||
<div class="flex min-w-0 flex-1 items-center gap-2">
|
||||
{#if mcpStore.getServerFavicon(server.id)}
|
||||
<img
|
||||
src={mcpStore.getServerFavicon(server.id)}
|
||||
alt=""
|
||||
class="h-4 w-4 shrink-0 rounded-sm"
|
||||
onerror={(e) => {
|
||||
(e.currentTarget as HTMLImageElement).style.display = 'none';
|
||||
}}
|
||||
<div class="min-w-0 flex-1">
|
||||
<McpServerIdentity
|
||||
{displayName}
|
||||
{faviconUrl}
|
||||
iconClass="h-4 w-4"
|
||||
iconRounded="rounded-sm"
|
||||
showVersion={false}
|
||||
nameClass="text-sm"
|
||||
/>
|
||||
{/if}
|
||||
|
||||
<span class="truncate text-sm">{getServerLabel(server)}</span>
|
||||
</div>
|
||||
|
||||
{#if hasError}
|
||||
<span
|
||||
|
||||
@@ -29,7 +29,11 @@
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Popover.Trigger class="pointer-events-none absolute inset-0 opacity-0">
|
||||
<Popover.Trigger
|
||||
class="pointer-events-none absolute inset-0 opacity-0"
|
||||
tabindex={-1}
|
||||
aria-hidden="true"
|
||||
>
|
||||
<span class="sr-only">{srLabel}</span>
|
||||
</Popover.Trigger>
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
sortTreeChildren
|
||||
} from './mcp-resources-browser';
|
||||
import { getDisplayName, getResourceIcon } from '$lib/utils';
|
||||
import { McpServerIdentity } from '$lib/components/app/mcp';
|
||||
|
||||
interface Props {
|
||||
serverName: string;
|
||||
@@ -43,11 +44,12 @@
|
||||
searchQuery = ''
|
||||
}: Props = $props();
|
||||
|
||||
let serverDisplayName = $derived(mcpStore.getServerDisplayName(serverName));
|
||||
let serverFaviconUrl = $derived(mcpStore.getServerFavicon(serverName));
|
||||
|
||||
const hasResources = $derived(serverRes.resources.length > 0);
|
||||
const hasTemplates = $derived(serverRes.templates.length > 0);
|
||||
const hasContent = $derived(hasResources || hasTemplates);
|
||||
const displayName = $derived(mcpStore.getServerDisplayName(serverName));
|
||||
const favicon = $derived(mcpStore.getServerFavicon(serverName));
|
||||
const resourceTree = $derived(buildResourceTree(serverRes.resources, serverName, searchQuery));
|
||||
|
||||
const templateInfos = $derived<MCPResourceTemplateInfo[]>(
|
||||
@@ -153,21 +155,15 @@
|
||||
<ChevronRight class="h-3.5 w-3.5" />
|
||||
{/if}
|
||||
|
||||
<span class="inline-flex flex-col items-start text-left">
|
||||
<span class="inline-flex items-center justify-start gap-1.5 font-medium">
|
||||
{#if favicon}
|
||||
<img
|
||||
src={favicon}
|
||||
alt=""
|
||||
class="h-4 w-4 shrink-0 rounded-sm"
|
||||
onerror={(e) => {
|
||||
(e.currentTarget as HTMLImageElement).style.display = 'none';
|
||||
}}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
{displayName}
|
||||
</span>
|
||||
<span class="inline-flex flex-col items-start gap-1 text-left">
|
||||
<div class="inline-flex min-w-0 items-center gap-1.5">
|
||||
<McpServerIdentity
|
||||
displayName={serverDisplayName}
|
||||
faviconUrl={serverFaviconUrl}
|
||||
iconClass="h-4 w-4"
|
||||
showVersion={false}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<span class="text-xs text-muted-foreground">
|
||||
({serverRes.resources.length} resource{serverRes.resources.length !== 1
|
||||
|
||||
@@ -17,17 +17,17 @@
|
||||
|
||||
interface Props {
|
||||
server: MCPServerSettingsEntry;
|
||||
faviconUrl: string | null;
|
||||
enabled?: boolean;
|
||||
onToggle: (enabled: boolean) => void;
|
||||
onUpdate: (updates: Partial<MCPServerSettingsEntry>) => void;
|
||||
onDelete: () => void;
|
||||
}
|
||||
|
||||
let { server, faviconUrl, enabled, onToggle, onUpdate, onDelete }: Props = $props();
|
||||
let { server, enabled, onToggle, onUpdate, onDelete }: Props = $props();
|
||||
|
||||
let healthState = $derived<HealthCheckState>(mcpStore.getHealthCheckState(server.id));
|
||||
let displayName = $derived(mcpStore.getServerLabel(server));
|
||||
let faviconUrl = $derived(mcpStore.getServerFavicon(server.id));
|
||||
let isIdle = $derived(healthState.status === HealthCheckStatus.IDLE);
|
||||
let isHealthChecking = $derived(healthState.status === HealthCheckStatus.CONNECTING);
|
||||
let isConnected = $derived(healthState.status === HealthCheckStatus.SUCCESS);
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
<script lang="ts">
|
||||
import { Cable, ExternalLink } from '@lucide/svelte';
|
||||
import { Switch } from '$lib/components/ui/switch';
|
||||
import { Badge } from '$lib/components/ui/badge';
|
||||
import { McpCapabilitiesBadges } from '$lib/components/app/mcp';
|
||||
import { McpCapabilitiesBadges, McpServerIdentity } from '$lib/components/app/mcp';
|
||||
import { MCP_TRANSPORT_LABELS, MCP_TRANSPORT_ICONS } from '$lib/constants';
|
||||
import { MCPTransportType } from '$lib/enums';
|
||||
import type { MCPServerInfo, MCPCapabilitiesInfo } from '$lib/types';
|
||||
|
||||
interface Props {
|
||||
displayName: string;
|
||||
faviconUrl: string | null;
|
||||
faviconUrl?: string | null;
|
||||
enabled: boolean;
|
||||
disabled?: boolean;
|
||||
onToggle: (enabled: boolean) => void;
|
||||
@@ -32,42 +31,16 @@
|
||||
|
||||
<div class="space-y-3">
|
||||
<div class="flex items-start justify-between gap-3">
|
||||
<div class="grid min-w-0 gap-3">
|
||||
<div class="flex items-center gap-2 overflow-hidden">
|
||||
{#if faviconUrl}
|
||||
<img
|
||||
src={faviconUrl}
|
||||
alt=""
|
||||
class="h-5 w-5 shrink-0 rounded"
|
||||
onerror={(e) => {
|
||||
(e.currentTarget as HTMLImageElement).style.display = 'none';
|
||||
}}
|
||||
/>
|
||||
{:else}
|
||||
<div class="flex h-5 w-5 shrink-0 items-center justify-center rounded bg-muted">
|
||||
<Cable class="h-3 w-3 text-muted-foreground" />
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<p class="min-w-0 shrink-0 truncate leading-none font-medium">{displayName}</p>
|
||||
|
||||
{#if serverInfo?.version}
|
||||
<Badge variant="secondary" class="h-4 min-w-0 truncate px-1 text-[10px]">
|
||||
v{serverInfo.version}
|
||||
</Badge>
|
||||
{/if}
|
||||
|
||||
{#if serverInfo?.websiteUrl}
|
||||
<a
|
||||
href={serverInfo.websiteUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
class="shrink-0 text-muted-foreground hover:text-foreground"
|
||||
aria-label="Open website"
|
||||
>
|
||||
<ExternalLink class="h-3 w-3" />
|
||||
</a>
|
||||
{/if}
|
||||
<div class="flex min-w-0 flex-col gap-3">
|
||||
<div class="inline-flex items-center gap-2">
|
||||
<McpServerIdentity
|
||||
{displayName}
|
||||
{faviconUrl}
|
||||
{serverInfo}
|
||||
iconClass="h-5 w-5"
|
||||
iconRounded="rounded"
|
||||
nameClass="leading-6 font-medium"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{#if capabilities || transportType}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
<script lang="ts">
|
||||
import { ExternalLink } from '@lucide/svelte';
|
||||
import { Badge } from '$lib/components/ui/badge';
|
||||
import { TruncatedText } from '$lib/components/app/misc';
|
||||
import { sanitizeExternalUrl } from '$lib/utils';
|
||||
import type { MCPServerInfo } from '$lib/types';
|
||||
|
||||
interface Props {
|
||||
displayName?: string;
|
||||
faviconUrl?: string | null;
|
||||
serverInfo?: MCPServerInfo;
|
||||
iconClass?: string;
|
||||
iconRounded?: string;
|
||||
showVersion?: boolean;
|
||||
showWebsite?: boolean;
|
||||
nameClass?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
displayName,
|
||||
faviconUrl = null,
|
||||
serverInfo,
|
||||
iconClass = 'h-5 w-5',
|
||||
iconRounded = 'rounded-sm',
|
||||
showVersion = true,
|
||||
showWebsite = true,
|
||||
nameClass
|
||||
}: Props = $props();
|
||||
|
||||
let safeWebsiteUrl = $derived(
|
||||
serverInfo?.websiteUrl ? sanitizeExternalUrl(serverInfo.websiteUrl) : null
|
||||
);
|
||||
</script>
|
||||
|
||||
<span class="flex min-w-0 items-center gap-1.5">
|
||||
{#if faviconUrl}
|
||||
<img
|
||||
src={faviconUrl}
|
||||
alt=""
|
||||
class={['shrink-0', iconRounded, iconClass]}
|
||||
onerror={(e) => {
|
||||
(e.currentTarget as HTMLImageElement).style.display = 'none';
|
||||
}}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
<TruncatedText text={displayName ?? ''} class={nameClass ?? ''} />
|
||||
|
||||
{#if showVersion && serverInfo?.version}
|
||||
<Badge variant="secondary" class="h-4 min-w-0 shrink px-1 text-[10px]">
|
||||
<TruncatedText text={`v${serverInfo.version}`} />
|
||||
</Badge>
|
||||
{/if}
|
||||
|
||||
{#if showWebsite && safeWebsiteUrl}
|
||||
<a
|
||||
href={safeWebsiteUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
class="shrink-0 text-muted-foreground hover:text-foreground"
|
||||
aria-label="Open website"
|
||||
onclick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<ExternalLink class="h-3 w-3" />
|
||||
</a>
|
||||
{/if}
|
||||
</span>
|
||||
@@ -180,6 +180,25 @@ export { default as McpServerCardDeleteDialog } from './McpServerCard/McpServerC
|
||||
/** Skeleton loading state for server card during health checks. */
|
||||
export { default as McpServerCardSkeleton } from './McpServerCardSkeleton.svelte';
|
||||
|
||||
/**
|
||||
* **McpServerIdentity** - Server identity display (icon, name, version)
|
||||
*
|
||||
* Reusable headless component for displaying server name, favicon/icon, and version badge.
|
||||
* Accepts all data via props with no store dependencies for predictable rendering.
|
||||
*
|
||||
* **Features:**
|
||||
* - Server favicon/icon with fallback
|
||||
* - Truncated display name with max-width
|
||||
* - Optional version badge (v1.2.3)
|
||||
* - Optional external link to server website
|
||||
*
|
||||
* @example
|
||||
* ```svelte
|
||||
* <McpServerIdentity displayName={name} faviconUrl={iconUrl} serverInfo={info} />
|
||||
* ```
|
||||
*/
|
||||
export { default as McpServerIdentity } from './McpServerIdentity.svelte';
|
||||
|
||||
/**
|
||||
* **McpServerInfo** - Server instructions display
|
||||
*
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
{#if isTruncated && showTooltip}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class={className}>
|
||||
<Tooltip.Trigger class="{className} min-w-0">
|
||||
<span bind:this={textElement} class="block truncate">
|
||||
{text}
|
||||
</span>
|
||||
@@ -43,7 +43,7 @@
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
<span bind:this={textElement} class="{className} block truncate">
|
||||
<span bind:this={textElement} class="{className} block min-w-0 truncate">
|
||||
{text}
|
||||
</span>
|
||||
{/if}
|
||||
|
||||
@@ -170,7 +170,7 @@
|
||||
>
|
||||
<Package class="h-3.5 w-3.5" />
|
||||
|
||||
<TruncatedText text={selectedOption?.model || ''} class="min-w-0 font-medium" />
|
||||
<TruncatedText text={selectedOption?.model || ''} class="font-medium" />
|
||||
|
||||
{#if ms.updating}
|
||||
<Loader2 class="h-3 w-3.5 animate-spin" />
|
||||
|
||||
@@ -2,28 +2,15 @@
|
||||
import { ChevronDown, ChevronRight } from '@lucide/svelte';
|
||||
import { Checkbox } from '$lib/components/ui/checkbox';
|
||||
import * as Collapsible from '$lib/components/ui/collapsible';
|
||||
import { TruncatedText } from '$lib/components/app';
|
||||
import { TruncatedText, McpServerIdentity } from '$lib/components/app';
|
||||
import { toolsStore } from '$lib/stores/tools.svelte';
|
||||
import { permissionsStore } from '$lib/stores/permissions.svelte';
|
||||
import { mcpStore } from '$lib/stores/mcp.svelte';
|
||||
import { ToolSource } from '$lib/enums';
|
||||
import { SvelteSet } from 'svelte/reactivity';
|
||||
|
||||
let expandedGroups = new SvelteSet<string>();
|
||||
let groups = $derived(toolsStore.toolGroups);
|
||||
|
||||
function getFavicon(group: { source: ToolSource; label: string }): string | null {
|
||||
if (group.source !== ToolSource.MCP) return null;
|
||||
|
||||
for (const server of mcpStore.getServersSorted()) {
|
||||
if (mcpStore.getServerLabel(server) === group.label) {
|
||||
return mcpStore.getServerFavicon(server.id);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function toggleExpanded(label: string) {
|
||||
if (expandedGroups.has(label)) {
|
||||
expandedGroups.delete(label);
|
||||
@@ -39,8 +26,6 @@
|
||||
<div class="space-y-2">
|
||||
{#each groups as group (group.label)}
|
||||
{@const isExpanded = expandedGroups.has(group.label)}
|
||||
{@const favicon = getFavicon(group)}
|
||||
|
||||
<Collapsible.Root open={isExpanded} onOpenChange={() => toggleExpanded(group.label)}>
|
||||
<Collapsible.Trigger
|
||||
class="flex w-full items-center gap-2 rounded-lg px-3 py-2 text-sm hover:bg-muted/50"
|
||||
@@ -51,19 +36,16 @@
|
||||
<ChevronRight class="h-3.5 w-3.5 shrink-0" />
|
||||
{/if}
|
||||
|
||||
<span class="inline-flex min-w-0 items-center gap-1.5 font-medium">
|
||||
{#if favicon}
|
||||
<img
|
||||
src={favicon}
|
||||
alt=""
|
||||
class="h-4 w-4 shrink-0 rounded-sm"
|
||||
onerror={(e) => {
|
||||
(e.currentTarget as HTMLImageElement).style.display = 'none';
|
||||
}}
|
||||
/>
|
||||
{/if}
|
||||
{@const faviconUrl = group.serverId ? mcpStore.getServerFavicon(group.serverId) : null}
|
||||
|
||||
<span class="truncate">{group.label}</span>
|
||||
<span class="inline-flex min-w-0 items-center gap-1.5 font-medium">
|
||||
<McpServerIdentity
|
||||
iconClass="h-4 w-4"
|
||||
iconRounded="rounded-sm"
|
||||
showVersion={false}
|
||||
displayName={group.label}
|
||||
{faviconUrl}
|
||||
/>
|
||||
</span>
|
||||
|
||||
<span class="ml-auto shrink-0 text-xs text-muted-foreground">
|
||||
@@ -89,7 +71,7 @@
|
||||
: false}
|
||||
|
||||
<div class="flex items-center gap-2 rounded px-2 py-1.5 text-sm hover:bg-muted/50">
|
||||
<TruncatedText text={toolName} class="min-w-0 flex-1 truncate" showTooltip={true} />
|
||||
<TruncatedText text={toolName} class="flex-1" showTooltip={true} />
|
||||
|
||||
<div class="flex w-16 shrink-0 justify-center">
|
||||
<Checkbox
|
||||
|
||||
@@ -54,14 +54,14 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<div in:fade={{ duration: 150 }} class="max-h-full overflow-auto">
|
||||
<div in:fade={{ duration: 150 }} class="h-full max-h-[100dvh] overflow-y-auto">
|
||||
<div class="flex items-center gap-2 p-4 md:absolute md:top-8 md:left-8 md:px-0 md:py-2">
|
||||
<McpLogo class="h-5 w-5 md:h-6 md:w-6" />
|
||||
|
||||
<h1 class="text-xl font-semibold md:text-2xl">MCP Servers</h1>
|
||||
</div>
|
||||
|
||||
<div class="sticky top-0 z-10 mt-4 flex items-start justify-end gap-4 px-8 py-4">
|
||||
<div class="sticky top-0 z-10 mt-4 flex items-start gap-4 p-4 md:justify-end md:px-8">
|
||||
<Button variant="outline" size="sm" class="shrink-0" onclick={() => (isAddingServer = true)}>
|
||||
<Plus class="h-4 w-4" />
|
||||
|
||||
@@ -89,7 +89,6 @@
|
||||
{:else}
|
||||
<McpServerCard
|
||||
{server}
|
||||
faviconUrl={mcpStore.getServerFavicon(server.id)}
|
||||
enabled={conversationsStore.isMcpServerEnabledForChat(server.id)}
|
||||
onToggle={async () => {
|
||||
const wasEnabled = conversationsStore.isMcpServerEnabledForChat(server.id);
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
'fixed z-[999999] grid w-full gap-4 border bg-background p-6 shadow-lg duration-200',
|
||||
// Mobile: Bottom sheet behavior
|
||||
'right-0 bottom-0 left-0 max-h-[100dvh] translate-x-0 translate-y-0 overflow-y-auto rounded-t-lg',
|
||||
'data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:slide-out-to-bottom-full',
|
||||
'data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:slide-out-to-bottom-full',
|
||||
'data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:slide-in-from-bottom-full',
|
||||
// Desktop: Centered dialog behavior
|
||||
'sm:top-[50%] sm:right-auto sm:bottom-auto sm:left-[50%] sm:max-h-[100vh] sm:max-w-lg sm:translate-x-[-50%] sm:translate-y-[-50%] sm:rounded-lg',
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
bind:ref
|
||||
data-slot="alert-dialog-overlay"
|
||||
class={cn(
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
bind:ref
|
||||
data-slot="dialog-content"
|
||||
class={cn(
|
||||
`fixed top-[50%] left-[50%] z-50 grid max-h-[100dvh] w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 overflow-y-auto rounded-lg border border-border/30 bg-background p-6 shadow-lg duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 sm:max-w-lg md:max-h-[100vh]`,
|
||||
`fixed top-[50%] left-[50%] z-50 grid max-h-[100dvh] w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 overflow-y-auto rounded-lg border border-border/30 bg-background p-6 shadow-lg duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 sm:max-w-lg md:max-h-[100vh]`,
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
bind:ref
|
||||
data-slot="dialog-overlay"
|
||||
class={cn(
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
data-slot="dropdown-menu-content"
|
||||
{sideOffset}
|
||||
class={cn(
|
||||
'z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20',
|
||||
'z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
bind:ref
|
||||
data-slot="dropdown-menu-sub-content"
|
||||
class={cn(
|
||||
'z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20',
|
||||
'z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
{collisionPadding}
|
||||
{avoidCollisions}
|
||||
class={cn(
|
||||
'z-50 w-72 origin-(--bits-popover-content-transform-origin) rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-hidden data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-end-2 data-[side=right]:slide-in-from-start-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
'z-50 w-72 origin-(--bits-popover-content-transform-origin) rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-hidden data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-end-2 data-[side=right]:slide-in-from-start-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
{sideOffset}
|
||||
data-slot="select-content"
|
||||
class={cn(
|
||||
'relative z-[var(--layer-popover,1000000)] max-h-(--bits-select-content-available-height) min-w-[8rem] origin-(--bits-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border bg-popover text-popover-foreground shadow-md data-[side=bottom]:translate-y-1 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:-translate-x-1 data-[side=left]:slide-in-from-right-2 data-[side=right]:translate-x-1 data-[side=right]:slide-in-from-left-2 data-[side=top]:-translate-y-1 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
'relative z-[var(--layer-popover,1000000)] max-h-(--bits-select-content-available-height) min-w-[8rem] origin-(--bits-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border bg-popover text-popover-foreground shadow-md data-[side=bottom]:translate-y-1 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:-translate-x-1 data-[side=left]:slide-in-from-right-2 data-[side=right]:translate-x-1 data-[side=right]:slide-in-from-left-2 data-[side=top]:-translate-y-1 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<script lang="ts" module>
|
||||
import { tv, type VariantProps } from 'tailwind-variants';
|
||||
export const sheetVariants = tv({
|
||||
base: `border-border/30 dark:border-border/20 data-[state=open]:animate-in data-[state=closed]:animate-out fixed z-50 flex flex-col gap-4 shadow-sm transition ease-in-out data-[state=closed]:duration-300 data-[state=open]:duration-500 ${PANEL_CLASSES}`,
|
||||
base: `border-border/30 dark:border-border/20 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fill-mode-forwards fixed z-50 flex flex-col gap-4 shadow-sm transition ease-in-out data-[state=closed]:duration-300 data-[state=open]:duration-500 ${PANEL_CLASSES}`,
|
||||
variants: {
|
||||
side: {
|
||||
top: 'data-[state=closed]:slide-out-to-top data-[state=open]:slide-in-from-top inset-x-0 top-0 h-auto border-b',
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
bind:ref
|
||||
data-slot="sheet-overlay"
|
||||
class={cn(
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
'fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=open]:animate-in data-[state=open]:fade-in-0',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
const contentClass = $derived(
|
||||
cn(
|
||||
'z-50 w-fit origin-(--bits-tooltip-content-transform-origin) animate-in rounded-md bg-primary px-3 py-1.5 text-xs text-balance text-primary-foreground fade-in-0 zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95',
|
||||
'z-50 w-fit origin-(--bits-tooltip-content-transform-origin) animate-in rounded-md bg-primary px-3 py-1.5 text-xs text-balance text-primary-foreground fade-in-0 zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=closed]:fill-mode-forwards',
|
||||
className
|
||||
)
|
||||
);
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
export const GOOGLE_FAVICON_BASE_URL = 'https://www.google.com/s2/favicons';
|
||||
export const DEFAULT_FAVICON_SIZE = 32;
|
||||
export const DOMAIN_SEPARATOR = '.';
|
||||
export const ROOT_DOMAIN_MIN_PARTS = 2;
|
||||
@@ -13,7 +13,6 @@ export * from './code-blocks';
|
||||
export * from './code';
|
||||
export * from './context-keys';
|
||||
export * from './css-classes';
|
||||
export * from './favicon';
|
||||
export * from './floating-ui-constraints';
|
||||
export * from './formatters';
|
||||
export * from './key-value-pairs';
|
||||
@@ -40,4 +39,5 @@ export * from './tools';
|
||||
export * from './tooltip-config';
|
||||
export * from './ui';
|
||||
export * from './uri-template';
|
||||
export * from './url';
|
||||
export * from './viewport';
|
||||
|
||||
@@ -13,7 +13,9 @@ export const MCP_ALLOWED_ICON_MIME_TYPES = new Set([
|
||||
MimeTypeImage.JPEG,
|
||||
MimeTypeImage.JPG,
|
||||
MimeTypeImage.SVG,
|
||||
MimeTypeImage.WEBP
|
||||
MimeTypeImage.WEBP,
|
||||
MimeTypeImage.ICO,
|
||||
MimeTypeImage.ICO_MICROSOFT
|
||||
]);
|
||||
|
||||
/**
|
||||
|
||||
186
tools/server/webui/src/lib/constants/url.ts
Normal file
186
tools/server/webui/src/lib/constants/url.ts
Normal file
@@ -0,0 +1,186 @@
|
||||
const STD = ['com', 'net', 'org', 'gov', 'edu'] as const;
|
||||
|
||||
const STD_MIL = [...STD, 'mil'] as const;
|
||||
|
||||
const ccTLD_PREFIXES: Record<string, readonly string[]> = {
|
||||
// --- Standard 5 only ---
|
||||
ar: STD,
|
||||
bd: STD,
|
||||
bg: STD,
|
||||
cn: STD_MIL,
|
||||
eg: STD,
|
||||
gr: STD,
|
||||
hk: STD,
|
||||
hr: STD,
|
||||
lk: STD,
|
||||
mx: STD_MIL,
|
||||
my: STD_MIL,
|
||||
ng: STD,
|
||||
ph: STD,
|
||||
pk: STD,
|
||||
pl: STD,
|
||||
ro: STD,
|
||||
ru: STD,
|
||||
sa: STD,
|
||||
si: STD,
|
||||
tr: STD,
|
||||
tw: STD,
|
||||
ua: STD,
|
||||
ve: STD,
|
||||
|
||||
au: [...STD_MIL, 'id', 'asn', 'csiro'],
|
||||
br: [
|
||||
...STD_MIL,
|
||||
'art',
|
||||
'eco',
|
||||
'eng',
|
||||
'inf',
|
||||
'med',
|
||||
'psi',
|
||||
'tmp',
|
||||
'etc',
|
||||
'adm',
|
||||
'adv',
|
||||
'arq',
|
||||
'bio',
|
||||
'bmd',
|
||||
'cim',
|
||||
'cng',
|
||||
'cnt',
|
||||
'coop',
|
||||
'ecn',
|
||||
'esp',
|
||||
'far',
|
||||
'fm',
|
||||
'fnd',
|
||||
'fot',
|
||||
'fst',
|
||||
'g12',
|
||||
'ggf',
|
||||
'imb',
|
||||
'ind',
|
||||
'jor',
|
||||
'jus',
|
||||
'leg',
|
||||
'lel',
|
||||
'mat',
|
||||
'mp',
|
||||
'mus',
|
||||
'not',
|
||||
'ntr',
|
||||
'odo',
|
||||
'ppg',
|
||||
'pro',
|
||||
'psc',
|
||||
'qsl',
|
||||
'rec',
|
||||
'slg',
|
||||
'srv',
|
||||
'trd',
|
||||
'tur',
|
||||
'tv',
|
||||
'vet',
|
||||
'vlog',
|
||||
'wiki',
|
||||
'zlg'
|
||||
],
|
||||
id: [...STD_MIL, 'co', 'go', 'or', 'web', 'sch'],
|
||||
in: [...STD_MIL, 'co', 'gen', 'ind', 'firm', 'ernet', 'nic'],
|
||||
kr: [...STD_MIL, 'co', 'go', 'or', 'ac', 're'],
|
||||
nz: [
|
||||
...STD_MIL,
|
||||
'co',
|
||||
'gen',
|
||||
'geek',
|
||||
'kiwi',
|
||||
'maori',
|
||||
'school',
|
||||
'govt',
|
||||
'health',
|
||||
'iwi',
|
||||
'parliament'
|
||||
],
|
||||
sg: [...STD, 'per'],
|
||||
th: ['co', 'go', 'or', 'in', 'ac', 'mi', 'net'],
|
||||
|
||||
ae: ['co', 'net', 'org', 'gov', 'ac', 'sch'],
|
||||
hu: ['co', 'net', 'org', 'gov', 'edu'],
|
||||
il: ['co', 'net', 'org', 'gov', 'ac', 'muni'],
|
||||
jp: ['ac', 'ad', 'co', 'ed', 'go', 'gr', 'lg', 'ne', 'or'],
|
||||
ke: ['co', 'or', 'ne', 'go', 'ac', 'sc'],
|
||||
rs: ['co', 'net', 'org', 'gov', 'edu'],
|
||||
uk: ['co', 'org', 'net', 'ac', 'gov', 'mil', 'nhs', 'police', 'mod', 'ltd', 'plc', 'me', 'sch'],
|
||||
za: ['co', 'org', 'net', 'web', 'law', 'mil']
|
||||
};
|
||||
|
||||
const WILDCARD_BASES: Record<string, readonly string[]> = {
|
||||
br: ['nom', 'blog'],
|
||||
jp: [
|
||||
'kobe',
|
||||
'kyoto',
|
||||
'nagoya',
|
||||
'osaka',
|
||||
'sapporo',
|
||||
'sendai',
|
||||
'tokyo',
|
||||
'yokohama',
|
||||
'aichi',
|
||||
'akita',
|
||||
'aomori',
|
||||
'chiba',
|
||||
'ehime',
|
||||
'fukui',
|
||||
'fukuoka',
|
||||
'fukushima',
|
||||
'gifu',
|
||||
'gunma',
|
||||
'hiroshima',
|
||||
'hokkaido',
|
||||
'hyogo',
|
||||
'ibaraki',
|
||||
'ishikawa',
|
||||
'iwate',
|
||||
'kagawa',
|
||||
'kagoshima',
|
||||
'kanagawa',
|
||||
'kochi',
|
||||
'kumamoto',
|
||||
'mie',
|
||||
'miyagi',
|
||||
'miyazaki',
|
||||
'nagano',
|
||||
'nara',
|
||||
'niigata',
|
||||
'oita',
|
||||
'okayama',
|
||||
'okinawa',
|
||||
'saga',
|
||||
'saitama',
|
||||
'shiga',
|
||||
'shimane',
|
||||
'shizuoka',
|
||||
'tochigi',
|
||||
'tokushima',
|
||||
'tottori',
|
||||
'toyama',
|
||||
'wakayama',
|
||||
'yamagata',
|
||||
'yamaguchi',
|
||||
'yamanashi'
|
||||
]
|
||||
};
|
||||
|
||||
function buildSuffixSet(suffixes: Record<string, readonly string[]>): Set<string> {
|
||||
const set = new Set<string>();
|
||||
|
||||
for (const [tld, parts] of Object.entries(suffixes)) {
|
||||
for (const part of parts) {
|
||||
set.add(`${part}.${tld}`);
|
||||
}
|
||||
}
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
export const TWO_PART_PUBLIC_SUFFIXES = buildSuffixSet(ccTLD_PREFIXES);
|
||||
export const WILDCARD_PUBLIC_SUFFIXES = buildSuffixSet(WILDCARD_BASES);
|
||||
@@ -182,7 +182,9 @@ export enum MimeTypeImage {
|
||||
PNG = 'image/png',
|
||||
GIF = 'image/gif',
|
||||
WEBP = 'image/webp',
|
||||
SVG = 'image/svg+xml'
|
||||
SVG = 'image/svg+xml',
|
||||
ICO = 'image/x-icon',
|
||||
ICO_MICROSOFT = 'image/vnd.microsoft.icon'
|
||||
}
|
||||
|
||||
export enum MimeTypeText {
|
||||
|
||||
@@ -24,10 +24,10 @@ export enum McpPromptVariant {
|
||||
*/
|
||||
export enum UrlProtocol {
|
||||
DATA = 'data:',
|
||||
HTTP = 'http://',
|
||||
HTTPS = 'https://',
|
||||
WEBSOCKET = 'ws://',
|
||||
WEBSOCKET_SECURE = 'wss://'
|
||||
HTTP = 'http:',
|
||||
HTTPS = 'https:',
|
||||
WEBSOCKET = 'ws:',
|
||||
WEBSOCKET_SECURE = 'wss:'
|
||||
}
|
||||
|
||||
export enum HtmlInputType {
|
||||
|
||||
@@ -27,6 +27,7 @@ import {
|
||||
} from '$lib/enums';
|
||||
import type {
|
||||
MCPServerConfig,
|
||||
MCPResourceIcon,
|
||||
ToolCallParams,
|
||||
ToolExecutionResult,
|
||||
Implementation,
|
||||
@@ -469,10 +470,11 @@ export class MCPService {
|
||||
title: impl.title,
|
||||
description: impl.description,
|
||||
websiteUrl: impl.websiteUrl,
|
||||
icons: impl.icons?.map((icon: { src: string; mimeType?: string; sizes?: string }) => ({
|
||||
icons: impl.icons?.map((icon: MCPResourceIcon) => ({
|
||||
src: icon.src,
|
||||
mimeType: icon.mimeType,
|
||||
sizes: icon.sizes
|
||||
sizes: icon.sizes,
|
||||
theme: icon.theme
|
||||
}))
|
||||
};
|
||||
}
|
||||
@@ -581,7 +583,6 @@ export class MCPService {
|
||||
this.createLog(MCPConnectionPhase.INITIALIZING, 'Sending initialize request...')
|
||||
);
|
||||
|
||||
console.log(`[MCPService][${serverName}] Connecting to server...`);
|
||||
try {
|
||||
await client.connect(transport);
|
||||
// Transport diagnostics are only for the initial handshake, not long-lived traffic.
|
||||
|
||||
@@ -26,11 +26,10 @@ import { config, settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { mcpResourceStore } from '$lib/stores/mcp-resources.svelte';
|
||||
import { mode } from 'mode-watcher';
|
||||
import {
|
||||
getProxiedUrlString,
|
||||
parseMcpServerSettings,
|
||||
detectMcpTransportFromUrl,
|
||||
getFaviconUrl,
|
||||
uuid
|
||||
uuid,
|
||||
extractRootDomain
|
||||
} from '$lib/utils';
|
||||
import {
|
||||
MCPConnectionPhase,
|
||||
@@ -413,7 +412,9 @@ class MCPStore {
|
||||
#isValidIconUri(src: string): boolean {
|
||||
try {
|
||||
if (src.startsWith(UrlProtocol.DATA)) return true;
|
||||
|
||||
const url = new URL(src);
|
||||
|
||||
return url.protocol === UrlProtocol.HTTPS;
|
||||
} catch {
|
||||
return false;
|
||||
@@ -446,40 +447,29 @@ class MCPStore {
|
||||
|
||||
// 1. Prefer icon explicitly matching the current color scheme
|
||||
const themedIcon = validIcons.find((icon) => icon.theme === preferredTheme);
|
||||
if (themedIcon) return this.#proxyIconSrc(themedIcon.src);
|
||||
if (themedIcon) return themedIcon.src;
|
||||
|
||||
// 2. Handle universal icons (no theme specified)
|
||||
const universalIcons = validIcons.filter((icon) => !icon.theme);
|
||||
|
||||
if (universalIcons.length === EXPECTED_THEMED_ICON_PAIR_COUNT) {
|
||||
// Heuristic: two theme-less icons → assume [0] = light, [1] = dark
|
||||
return this.#proxyIconSrc(universalIcons[isDark ? 1 : 0].src);
|
||||
return universalIcons[isDark ? 1 : 0].src;
|
||||
}
|
||||
|
||||
if (universalIcons.length > 0) {
|
||||
return this.#proxyIconSrc(universalIcons[0].src);
|
||||
return universalIcons[0].src;
|
||||
}
|
||||
|
||||
// 3. Last resort: use opposite-theme icon
|
||||
return this.#proxyIconSrc(validIcons[0].src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route an icon src through the CORS proxy if it's an HTTPS URL.
|
||||
* Data URIs are returned as-is.
|
||||
*/
|
||||
#proxyIconSrc(src: string): string {
|
||||
if (src.startsWith('data:')) return src;
|
||||
if (!this._proxyAvailable) return src;
|
||||
|
||||
return getProxiedUrlString(src);
|
||||
return validIcons[0].src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get icon URL for an MCP server by its ID.
|
||||
* Prefers the server's own icons (from MCP spec) and falls back
|
||||
* to Google's favicon service.
|
||||
* Returns null if server is not found.
|
||||
* Returns the best icon from the MCP server's `icons` array
|
||||
* (see MCP spec: spec.modelcontextprotocol.io).
|
||||
* Returns null if no icon is available.
|
||||
*/
|
||||
getServerFavicon(serverId: string): string | null {
|
||||
const server = this.getServerById(serverId);
|
||||
@@ -497,7 +487,39 @@ class MCPStore {
|
||||
}
|
||||
}
|
||||
|
||||
return getFaviconUrl(server.url, this._proxyAvailable);
|
||||
// Fallback: try favicon from root domain
|
||||
const fallbackUrl = this.#getServerFaviconFallback(server.url);
|
||||
if (fallbackUrl) {
|
||||
return fallbackUrl;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a fallback favicon URL from the MCP server URL.
|
||||
* e.g. https://mcp.exa.ai/mcp -> https://exa.ai/favicon.ico
|
||||
*/
|
||||
#getServerFaviconFallback(serverUrl: string): string | null {
|
||||
try {
|
||||
const url = new URL(serverUrl);
|
||||
const rootDomain = extractRootDomain(url);
|
||||
if (!rootDomain) return null;
|
||||
|
||||
const origin = `${url.protocol}//${rootDomain}`;
|
||||
const candidates = ['favicon.ico', 'favicon.svg', 'favicon.png'];
|
||||
|
||||
for (const path of candidates) {
|
||||
const faviconUrl = `${origin}/${path}`;
|
||||
if (this.#isValidIconUri(faviconUrl)) {
|
||||
return faviconUrl;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Invalid URL, return null
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
isAnyServerLoading(): boolean {
|
||||
|
||||
@@ -55,6 +55,10 @@ class ModelsStore {
|
||||
selectedModelId = $state<string | null>(null);
|
||||
selectedModelName = $state<string | null>(null);
|
||||
|
||||
// dedup concurrent fetch() callers, all awaiters share the same inflight promise
|
||||
// without this, ?model=<name> URL handler raced an in-progress fetch and saw an empty list
|
||||
private inflightFetch: Promise<void> | null = null;
|
||||
|
||||
private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
|
||||
private modelLoadingStates = new SvelteMap<string, boolean>();
|
||||
|
||||
@@ -258,9 +262,18 @@ class ModelsStore {
|
||||
* Also fetches modalities for MODEL mode (single model)
|
||||
*/
|
||||
async fetch(force = false): Promise<void> {
|
||||
if (this.loading) return;
|
||||
if (this.inflightFetch) return this.inflightFetch;
|
||||
if (this.models.length > 0 && !force) return;
|
||||
|
||||
this.inflightFetch = this.runFetch();
|
||||
try {
|
||||
await this.inflightFetch;
|
||||
} finally {
|
||||
this.inflightFetch = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async runFetch(): Promise<void> {
|
||||
this.loading = true;
|
||||
this.error = null;
|
||||
|
||||
|
||||
@@ -33,12 +33,3 @@ export function buildProxiedHeaders(headers: Record<string, string>): Record<str
|
||||
|
||||
return proxiedHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a proxied URL string for use in fetch requests.
|
||||
* @param targetUrl - The original URL to proxy
|
||||
* @returns Proxied URL as string
|
||||
*/
|
||||
export function getProxiedUrlString(targetUrl: string): string {
|
||||
return buildProxiedUrl(targetUrl).href;
|
||||
}
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
/**
|
||||
* Favicon utility functions for extracting favicons from URLs.
|
||||
*/
|
||||
|
||||
import { getProxiedUrlString } from './cors-proxy';
|
||||
import {
|
||||
GOOGLE_FAVICON_BASE_URL,
|
||||
DEFAULT_FAVICON_SIZE,
|
||||
DOMAIN_SEPARATOR,
|
||||
ROOT_DOMAIN_MIN_PARTS
|
||||
} from '$lib/constants';
|
||||
|
||||
/**
|
||||
* Gets a favicon URL for a given URL using Google's favicon service.
|
||||
* Returns null if the URL is invalid.
|
||||
*
|
||||
* @param urlString - The URL to get the favicon for
|
||||
* @returns The favicon URL or null if invalid
|
||||
*/
|
||||
export function getFaviconUrl(urlString: string, useProxy = true): string | null {
|
||||
try {
|
||||
const url = new URL(urlString);
|
||||
const hostnameParts = url.hostname.split(DOMAIN_SEPARATOR);
|
||||
const rootDomain =
|
||||
hostnameParts.length >= ROOT_DOMAIN_MIN_PARTS
|
||||
? hostnameParts.slice(-ROOT_DOMAIN_MIN_PARTS).join(DOMAIN_SEPARATOR)
|
||||
: url.hostname;
|
||||
|
||||
const googleFaviconUrl = `${GOOGLE_FAVICON_BASE_URL}?domain=${rootDomain}&sz=${DEFAULT_FAVICON_SIZE}`;
|
||||
return useProxy ? getProxiedUrlString(googleFaviconUrl) : googleFaviconUrl;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,10 @@ export { highlightCode, detectIncompleteCodeBlock, type IncompleteCodeBlock } fr
|
||||
export { setConfigValue, getConfigValue, configToParameterRecord } from './config-helpers';
|
||||
|
||||
// CORS Proxy
|
||||
export { buildProxiedUrl, getProxiedUrlString, buildProxiedHeaders } from './cors-proxy';
|
||||
export { buildProxiedUrl, buildProxiedHeaders } from './cors-proxy';
|
||||
|
||||
// URL utilities
|
||||
export { extractRootDomain, sanitizeExternalUrl } from './url';
|
||||
|
||||
// Conversation utilities
|
||||
export { createMessageCountMap, getMessageCount } from './conversation-utils';
|
||||
@@ -146,9 +149,6 @@ export { createBase64DataUrl } from './data-url';
|
||||
// Header utilities
|
||||
export { parseHeadersToArray, serializeHeaders } from './headers';
|
||||
|
||||
// Favicon utilities
|
||||
export { getFaviconUrl } from './favicon';
|
||||
|
||||
// Agentic content utilities (structured section derivation)
|
||||
export {
|
||||
deriveAgenticSections,
|
||||
|
||||
72
tools/server/webui/src/lib/utils/url.ts
Normal file
72
tools/server/webui/src/lib/utils/url.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { TWO_PART_PUBLIC_SUFFIXES, WILDCARD_PUBLIC_SUFFIXES } from '$lib/constants';
|
||||
import { UrlProtocol } from '$lib/enums';
|
||||
|
||||
/**
|
||||
* Check whether a hostname looks like an IPv4 or IPv6 address.
|
||||
*/
|
||||
function isIpAddress(hostname: string): boolean {
|
||||
if (hostname.includes(':')) return true;
|
||||
|
||||
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname)) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the registrable root domain from a URL.
|
||||
*
|
||||
* @example
|
||||
* 'mcp.example.com' -> 'example.com'
|
||||
* 'www.example.co.uk' -> 'example.co.uk'
|
||||
* 'bar.foo.nom.br' -> 'bar.foo.nom.br'
|
||||
* '192.168.1.1' -> null
|
||||
* 'localhost' -> null
|
||||
*/
|
||||
export function extractRootDomain(url: URL): string | null {
|
||||
const hostname = url.hostname.toLowerCase();
|
||||
if (!hostname || isIpAddress(hostname)) return null;
|
||||
|
||||
const parts = hostname.split('.');
|
||||
|
||||
if (parts.length < 2) return null;
|
||||
|
||||
if (parts.length >= 3) {
|
||||
const suffix2 = `${parts[parts.length - 2]}.${parts[parts.length - 1]}`;
|
||||
|
||||
if (TWO_PART_PUBLIC_SUFFIXES.has(suffix2)) {
|
||||
return parts.slice(-3).join('.');
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 2; i <= parts.length; i++) {
|
||||
const candidate = parts.slice(-i).join('.');
|
||||
|
||||
if (WILDCARD_PUBLIC_SUFFIXES.has(candidate)) {
|
||||
if (parts.length === i + 1) {
|
||||
return hostname;
|
||||
}
|
||||
|
||||
return parts.slice(-(i + 2)).join('.');
|
||||
}
|
||||
}
|
||||
|
||||
return parts.slice(-2).join('.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize an external URL string for safe use in an `<a href>`.
|
||||
* Only allows http: and https: schemes. Returns `null` for anything else.
|
||||
*/
|
||||
export function sanitizeExternalUrl(raw: string): string | null {
|
||||
try {
|
||||
const url = new URL(raw);
|
||||
|
||||
if (url.protocol !== UrlProtocol.HTTP && url.protocol !== UrlProtocol.HTTPS) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return url.href;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user