mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-26 14:23:22 +02:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed4837891d | ||
|
|
cacc371f99 | ||
|
|
ae2368e74e | ||
|
|
9f0684f003 | ||
|
|
34ec1c3f18 | ||
|
|
e877ad8bd9 | ||
|
|
35715657cb | ||
|
|
f75c4e8bf5 | ||
|
|
99156f3a5f | ||
|
|
a0c91e8f9f | ||
|
|
07968d53e4 | ||
|
|
ba3b9c8844 | ||
|
|
94b0200a01 |
@@ -1,8 +1,8 @@
|
||||
ARG UBUNTU_VERSION=24.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG ROCM_VERSION=7.0
|
||||
ARG AMDGPU_VERSION=7.0
|
||||
ARG ROCM_VERSION=7.2
|
||||
ARG AMDGPU_VERSION=7.2
|
||||
|
||||
# Target the ROCm build image
|
||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||
@@ -11,13 +11,12 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
|
||||
# This is mostly tied to rocBLAS supported archs.
|
||||
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
|
||||
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
|
||||
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
|
||||
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
|
||||
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
|
||||
|
||||
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
|
||||
#ARG ROCM_DOCKER_ARCH='gfx1151'
|
||||
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'
|
||||
|
||||
# Set ROCm architectures
|
||||
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||
|
||||
98
.github/workflows/release.yml
vendored
98
.github/workflows/release.yml
vendored
@@ -516,6 +516,102 @@ jobs:
|
||||
path: llama-bin-win-sycl-x64.zip
|
||||
name: llama-bin-win-sycl-x64.zip
|
||||
|
||||
ubuntu-22-rocm:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- ROCM_VERSION: "7.2"
|
||||
gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201"
|
||||
build: 'x64'
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: ccache
|
||||
uses: ggml-org/ccache-action@v1.2.16
|
||||
with:
|
||||
key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
|
||||
evict-old-files: 1d
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
sudo apt install -y build-essential git cmake wget
|
||||
|
||||
- name: Setup Legacy ROCm
|
||||
if: matrix.ROCM_VERSION == '7.2'
|
||||
id: legacy_env
|
||||
run: |
|
||||
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
|
||||
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
|
||||
sudo tee /etc/apt/sources.list.d/rocm.list << EOF
|
||||
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main
|
||||
EOF
|
||||
|
||||
sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF
|
||||
Package: *
|
||||
Pin: release o=repo.radeon.com
|
||||
Pin-Priority: 600
|
||||
EOF
|
||||
|
||||
sudo apt update
|
||||
sudo apt-get install -y libssl-dev rocm-hip-sdk
|
||||
|
||||
- name: Setup TheRock
|
||||
if: matrix.ROCM_VERSION != '7.2'
|
||||
id: therock_env
|
||||
run: |
|
||||
wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
|
||||
mkdir install
|
||||
tar -xf *.tar.gz -C install
|
||||
export ROCM_PATH=$(pwd)/install
|
||||
echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV
|
||||
echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
|
||||
echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV
|
||||
|
||||
- name: Build with native CMake HIP support
|
||||
id: cmake_build
|
||||
run: |
|
||||
cmake -B build -S . \
|
||||
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
|
||||
-DCMAKE_HIP_FLAGS="-mllvm --amdgpu-unroll-threshold-local=600" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_BACKEND_DL=ON \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
|
||||
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
|
||||
-DGGML_CPU_ALL_VARIANTS=ON \
|
||||
-DGPU_TARGETS="${{ matrix.gpu_targets }}" \
|
||||
-DGGML_HIP=ON \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
||||
${{ env.CMAKE_ARGS }}
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
uses: ./.github/actions/get-tag-name
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
|
||||
name: llama-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
|
||||
|
||||
windows-hip:
|
||||
runs-on: windows-2022
|
||||
|
||||
@@ -784,6 +880,7 @@ jobs:
|
||||
- windows-cuda
|
||||
- windows-sycl
|
||||
- windows-hip
|
||||
- ubuntu-22-rocm
|
||||
- ubuntu-22-cpu
|
||||
- ubuntu-22-vulkan
|
||||
- macOS-arm64
|
||||
@@ -868,6 +965,7 @@ jobs:
|
||||
**Linux:**
|
||||
- [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
|
||||
- [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
|
||||
- [Ubuntu x64 (ROCm 7.2)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-7.2-x64.tar.gz)
|
||||
- [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
|
||||
|
||||
**Windows:**
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
||||
cmake_minimum_required(VERSION 3.14...3.28) # for add_link_options and implicit target directories.
|
||||
project("llama.cpp" C CXX)
|
||||
include(CheckIncludeFileCXX)
|
||||
|
||||
|
||||
@@ -803,7 +803,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
||||
}
|
||||
|
||||
// remove potential partial suffix
|
||||
if (builder.pos() == builder.input().size()) {
|
||||
if (builder.pos() == builder.input().size() && builder.is_partial()) {
|
||||
if (unclosed_reasoning_content.empty()) {
|
||||
rstrip(content);
|
||||
trim_potential_partial_word(content);
|
||||
|
||||
@@ -893,23 +893,6 @@ static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
|
||||
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
||||
}
|
||||
|
||||
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
|
||||
static const xml_tool_call_format form = ([]() {
|
||||
xml_tool_call_format form {};
|
||||
form.scope_start = "<tool_call>";
|
||||
form.tool_start = "<function=";
|
||||
form.tool_sep = ">";
|
||||
form.key_start = "<parameter=";
|
||||
form.key_val_sep = ">";
|
||||
form.val_end = "</parameter>";
|
||||
form.tool_end = "</function>";
|
||||
form.scope_end = "</tool_call>";
|
||||
form.trim_raw_argval = true;
|
||||
return form;
|
||||
})();
|
||||
builder.consume_reasoning_with_xml_tool_calls(form);
|
||||
}
|
||||
|
||||
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
||||
static const xml_tool_call_format form = ([]() {
|
||||
xml_tool_call_format form {};
|
||||
@@ -1590,9 +1573,6 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
||||
case COMMON_CHAT_FORMAT_KIMI_K2:
|
||||
common_chat_parse_kimi_k2(builder);
|
||||
break;
|
||||
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
|
||||
common_chat_parse_qwen3_coder_xml(builder);
|
||||
break;
|
||||
case COMMON_CHAT_FORMAT_APRIEL_1_5:
|
||||
common_chat_parse_apriel_1_5(builder);
|
||||
break;
|
||||
|
||||
@@ -736,7 +736,6 @@ const char * common_chat_format_name(common_chat_format format) {
|
||||
case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
|
||||
case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
|
||||
case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
|
||||
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
||||
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
||||
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
||||
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
|
||||
@@ -1522,14 +1521,17 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
static common_chat_params common_chat_params_init_qwen3_coder(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
data.prompt = apply(tmpl, inputs);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
|
||||
|
||||
// Nemotron Nano 3 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
|
||||
bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
|
||||
|
||||
// Handle thinking tags appropriately based on inputs.enable_thinking
|
||||
if (string_ends_with(data.prompt, "<think>\n")) {
|
||||
if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
|
||||
if (!inputs.enable_thinking) {
|
||||
data.prompt += "</think>";
|
||||
} else {
|
||||
@@ -1538,19 +1540,21 @@ static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_
|
||||
}
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<tool_call>",
|
||||
"</tool_call>",
|
||||
};
|
||||
|
||||
if (supports_reasoning) {
|
||||
data.preserved_tokens.insert(data.preserved_tokens.end(), {"<think>", "</think>"});
|
||||
}
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = true;
|
||||
|
||||
auto parser = build_chat_peg_constructed_parser([&](auto & p) {
|
||||
auto reasoning = p.eps();
|
||||
if (inputs.enable_thinking && extract_reasoning) {
|
||||
if (supports_reasoning && inputs.enable_thinking && extract_reasoning) {
|
||||
auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
|
||||
if (data.thinking_forced_open) {
|
||||
reasoning = reasoning_content;
|
||||
@@ -1888,38 +1892,6 @@ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_t
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
|
||||
data.prompt = apply(tmpl, params);
|
||||
data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
|
||||
|
||||
data.preserved_tokens = {
|
||||
"<tool_call>",
|
||||
"</tool_call>",
|
||||
"<function=",
|
||||
"</function>",
|
||||
"<parameter=",
|
||||
"</parameter>",
|
||||
};
|
||||
|
||||
// build grammar for tool call
|
||||
static const xml_tool_call_format form {
|
||||
/* form.scope_start = */ "<tool_call>\n",
|
||||
/* form.tool_start = */ "<function=",
|
||||
/* form.tool_sep = */ ">\n",
|
||||
/* form.key_start = */ "<parameter=",
|
||||
/* form.key_val_sep = */ ">\n",
|
||||
/* form.val_end = */ "\n</parameter>\n",
|
||||
/* form.tool_end = */ "</function>\n",
|
||||
/* form.scope_end = */ "</tool_call>",
|
||||
};
|
||||
build_grammar_xml_tool_call(data, params.tools, form);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
|
||||
common_chat_params data;
|
||||
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
@@ -3147,13 +3119,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
src.find("<function=") != std::string::npos &&
|
||||
src.find("<parameter=") != std::string::npos) {
|
||||
workaround::func_args_not_string(params.messages);
|
||||
// Models with <think> support (Step-3.5-Flash, Nemotron 3 Nano) use the
|
||||
// Nemotron v3 PEG parser for streaming and schema-aware parameter parsing.
|
||||
// Qwen3-Coder has no <think> in its template.
|
||||
if (src.find("<think>") != std::string::npos) {
|
||||
return common_chat_params_init_nemotron_v3(tmpl, params);
|
||||
}
|
||||
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
||||
return common_chat_params_init_qwen3_coder(tmpl, params);
|
||||
}
|
||||
|
||||
// Xiaomi MiMo format detection (must come before Hermes 2 Pro)
|
||||
|
||||
@@ -128,7 +128,6 @@ enum common_chat_format {
|
||||
COMMON_CHAT_FORMAT_GLM_4_5,
|
||||
COMMON_CHAT_FORMAT_MINIMAX_M2,
|
||||
COMMON_CHAT_FORMAT_KIMI_K2,
|
||||
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
|
||||
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
||||
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
||||
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
||||
|
||||
@@ -1274,6 +1274,9 @@ class TextModel(ModelBase):
|
||||
if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
|
||||
# ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
|
||||
res = "joyai-llm"
|
||||
if chkhsh == "e4d54df1ebc1f2b91acd986c5b51aa50837d5faf7c7398e73c1f9e9ee5d19869":
|
||||
# ref: https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601
|
||||
res = "kanana2"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
||||
@@ -152,6 +152,7 @@ models = [
|
||||
{"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
|
||||
{"name": "qwen35", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
|
||||
{"name": "joyai-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
|
||||
{"name": "kanana2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
|
||||
]
|
||||
|
||||
# some models are known to be broken upstream, so we will skip them as exceptions
|
||||
|
||||
@@ -1149,8 +1149,7 @@ struct ggml_cuda_graph {
|
||||
size_t num_nodes = 0;
|
||||
std::vector<cudaGraphNode_t> nodes;
|
||||
bool disable_due_to_gpu_arch = false;
|
||||
bool disable_due_to_too_many_updates = false;
|
||||
int number_consecutive_updates = 0;
|
||||
bool warmup_complete = false;
|
||||
std::vector<ggml_cuda_graph_node_properties> props;
|
||||
|
||||
// these are extra tensors (inputs) that participate in the ggml graph but are not nodes
|
||||
@@ -1159,21 +1158,9 @@ struct ggml_cuda_graph {
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/19165
|
||||
std::vector<ggml_cuda_graph_node_properties> extra;
|
||||
|
||||
void record_update(bool use_graph, bool update_required) {
|
||||
if (use_graph && update_required) {
|
||||
number_consecutive_updates++;
|
||||
} else {
|
||||
number_consecutive_updates = 0;
|
||||
}
|
||||
if (number_consecutive_updates >= 4) {
|
||||
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
|
||||
disable_due_to_too_many_updates = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_enabled() const {
|
||||
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
|
||||
return !(disable_due_to_gpu_arch || disable_cuda_graphs_due_to_env || disable_due_to_too_many_updates);
|
||||
return !(disable_due_to_gpu_arch || disable_cuda_graphs_due_to_env);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -2979,10 +2979,6 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
|
||||
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
|
||||
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
|
||||
|
||||
if (graph->instance == nullptr) {
|
||||
res = true;
|
||||
}
|
||||
|
||||
// Check if the graph size has changed
|
||||
if (graph->props.size() != (size_t)cgraph->n_nodes) {
|
||||
res = true;
|
||||
@@ -3931,14 +3927,35 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
|
||||
#ifdef USE_CUDA_GRAPH
|
||||
graph_key = ggml_cuda_graph_get_key(cgraph);
|
||||
|
||||
use_cuda_graph = ggml_cuda_graph_set_enabled(cuda_ctx, graph_key);
|
||||
ggml_cuda_graph_set_enabled(cuda_ctx, graph_key);
|
||||
|
||||
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
|
||||
if (graph->is_enabled()) {
|
||||
cuda_graph_update_required = ggml_cuda_graph_update_required(cuda_ctx, cgraph);
|
||||
use_cuda_graph = ggml_cuda_graph_check_compability(cgraph);
|
||||
const bool graph_compatible = ggml_cuda_graph_check_compability(cgraph);
|
||||
if (graph_compatible) {
|
||||
const bool properties_changed = ggml_cuda_graph_update_required(cuda_ctx, cgraph);
|
||||
|
||||
graph->record_update(use_cuda_graph, cuda_graph_update_required);
|
||||
if (!graph->warmup_complete) {
|
||||
// Warmup: need at least 2 calls with no property change on the 2nd call
|
||||
if (!properties_changed) {
|
||||
graph->warmup_complete = true;
|
||||
GGML_LOG_DEBUG("%s: CUDA graph warmup complete\n", __func__);
|
||||
use_cuda_graph = true;
|
||||
cuda_graph_update_required = true;
|
||||
}
|
||||
// else: properties changed or first call - execute directly (use_cuda_graph stays false)
|
||||
} else {
|
||||
// Post-warmup: normal CUDA graph operation
|
||||
if (properties_changed) {
|
||||
// Properties changed - reset warmup, execute directly until stable again
|
||||
graph->warmup_complete = false;
|
||||
GGML_LOG_DEBUG("%s: CUDA graph warmup reset\n", __func__);
|
||||
} else {
|
||||
use_cuda_graph = true;
|
||||
cuda_graph_update_required = graph->instance == nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA_GRAPH
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
HTTPLIB_VERSION = "d4180e923f846b44a3d30acd938438d6e64fc9f6"
|
||||
HTTPLIB_VERSION = "refs/tags/v0.33.1"
|
||||
|
||||
vendor = {
|
||||
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
|
||||
|
||||
@@ -1703,8 +1703,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
} break;
|
||||
case LLM_ARCH_DEEPSEEK2:
|
||||
{
|
||||
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
||||
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
||||
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
|
||||
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));
|
||||
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
||||
|
||||
@@ -2027,7 +2027,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
|
||||
} else if (
|
||||
tokenizer_pre == "gpt-4o" ||
|
||||
tokenizer_pre == "llama4") {
|
||||
tokenizer_pre == "llama4" ||
|
||||
tokenizer_pre == "kanana2") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
|
||||
@@ -229,6 +229,20 @@ common_chat_tool python_tool {
|
||||
"required": ["code"]
|
||||
})",
|
||||
};
|
||||
common_chat_tool todo_list_tool {
|
||||
/* .name = */ "todo_list",
|
||||
/* .description = */ "Create or update the todo list",
|
||||
/* .parameters = */ R"({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"todos": {
|
||||
"type": "array",
|
||||
"description": "List of TODO list items"
|
||||
}
|
||||
},
|
||||
"required": ["todos"]
|
||||
})",
|
||||
};
|
||||
common_chat_tool code_interpreter_tool {
|
||||
/* .name = */ "code_interpreter",
|
||||
/* .description = */ "an ipython interpreter",
|
||||
@@ -3018,542 +3032,6 @@ Hey there!<|im_end|>
|
||||
);
|
||||
}
|
||||
|
||||
// Test Qwen3-Coder XML format
|
||||
{
|
||||
// Basic XML tool call parsing
|
||||
assert_msg_equals(
|
||||
message_assist_call,
|
||||
test_chat_parse(
|
||||
"<tool_call>\n"
|
||||
" <function=special_function>\n"
|
||||
" <parameter=arg1>\n"
|
||||
" 1\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
/* is_partial= */ false,
|
||||
{COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
|
||||
|
||||
// Multiple parameters with different types
|
||||
common_chat_msg expected_multi_param;
|
||||
expected_multi_param.role = "assistant";
|
||||
expected_multi_param.tool_calls = {
|
||||
{ "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(expected_multi_param,
|
||||
"<tool_call>\n"
|
||||
" <function=complex_function>\n"
|
||||
" <parameter=name>\n"
|
||||
" John Doe\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=age>\n"
|
||||
" 30\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=active>\n"
|
||||
" true\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=score>\n"
|
||||
" 95.5\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Special characters and Unicode
|
||||
common_chat_msg expected_special_chars;
|
||||
expected_special_chars.role = "assistant";
|
||||
expected_special_chars.tool_calls = {
|
||||
{ "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(expected_special_chars,
|
||||
"<tool_call>\n"
|
||||
" <function=unicode_function>\n"
|
||||
" <parameter=message>\n"
|
||||
" Hello 世界! 🌍 Special chars: @#$%^&*()\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Multiline content with newlines and indentation
|
||||
common_chat_msg expected_multiline;
|
||||
expected_multiline.role = "assistant";
|
||||
expected_multiline.tool_calls = {
|
||||
{ "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(expected_multiline,
|
||||
"<tool_call>\n"
|
||||
" <function=code_function>\n"
|
||||
" <parameter=code>\n"
|
||||
"def hello():\n"
|
||||
" print(\"Hello, World!\")\n"
|
||||
" return True\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// JSON object as parameter value
|
||||
common_chat_msg expected_json_param;
|
||||
expected_json_param.role = "assistant";
|
||||
expected_json_param.tool_calls = {
|
||||
{ "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_json_param,
|
||||
"<tool_call>\n"
|
||||
" <function=json_function>\n"
|
||||
" <parameter=config>\n"
|
||||
" {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Array as parameter value
|
||||
common_chat_msg expected_array_param;
|
||||
expected_array_param.role = "assistant";
|
||||
expected_array_param.tool_calls = {
|
||||
{ "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_array_param,
|
||||
"<tool_call>\n"
|
||||
" <function=array_function>\n"
|
||||
" <parameter=items>\n"
|
||||
" [\"apple\", \"banana\", \"cherry\"]\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Empty parameter
|
||||
common_chat_msg expected_empty_param;
|
||||
expected_empty_param.role = "assistant";
|
||||
expected_empty_param.tool_calls = {
|
||||
{ "empty_function", "{\"empty_param\":\"\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_empty_param,
|
||||
"<tool_call>\n"
|
||||
" <function=empty_function>\n"
|
||||
" <parameter=empty_param>\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Boolean values (true/false)
|
||||
common_chat_msg expected_boolean;
|
||||
expected_boolean.role = "assistant";
|
||||
expected_boolean.tool_calls = {
|
||||
{ "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_boolean,
|
||||
"<tool_call>\n"
|
||||
" <function=boolean_function>\n"
|
||||
" <parameter=enabled>\n"
|
||||
" true\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=debug>\n"
|
||||
" false\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Null value
|
||||
common_chat_msg expected_null;
|
||||
expected_null.role = "assistant";
|
||||
expected_null.tool_calls = {
|
||||
{ "null_function", "{\"optional_param\":null}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_null,
|
||||
"<tool_call>\n"
|
||||
" <function=null_function>\n"
|
||||
" <parameter=optional_param>\n"
|
||||
" null\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Negative numbers and scientific notation
|
||||
common_chat_msg expected_numbers;
|
||||
expected_numbers.role = "assistant";
|
||||
expected_numbers.tool_calls = {
|
||||
{ "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_numbers,
|
||||
"<tool_call>\n"
|
||||
" <function=math_function>\n"
|
||||
" <parameter=negative>\n"
|
||||
" -42\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=decimal>\n"
|
||||
" -3.14\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=scientific>\n"
|
||||
" 1.23e-4\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// XML-like content in parameters (should be escaped)
|
||||
common_chat_msg expected_xml_content;
|
||||
expected_xml_content.role = "assistant";
|
||||
expected_xml_content.tool_calls = {
|
||||
{ "xml_function", "{\"xml_content\":\"<root><item>value</item></root>\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_xml_content,
|
||||
"<tool_call>\n"
|
||||
" <function=xml_function>\n"
|
||||
" <parameter=xml_content>\n"
|
||||
" <root><item>value</item></root>\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Quotes and escape characters
|
||||
common_chat_msg expected_quotes;
|
||||
expected_quotes.role = "assistant";
|
||||
expected_quotes.tool_calls = {
|
||||
{ "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_quotes,
|
||||
"<tool_call>\n"
|
||||
" <function=quote_function>\n"
|
||||
" <parameter=message>\n"
|
||||
" She said \"Hello!\" and left.\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Long parameter value (simplified)
|
||||
std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
|
||||
|
||||
common_chat_msg expected_long_text;
|
||||
expected_long_text.role = "assistant";
|
||||
expected_long_text.tool_calls = {
|
||||
{ "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_long_text,
|
||||
"<tool_call>\n"
|
||||
" <function=long_function>\n"
|
||||
" <parameter=long_text>\n"
|
||||
" " + long_text + "\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Mixed content with text before and after tool call
|
||||
common_chat_msg expected_mixed_content;
|
||||
expected_mixed_content.role = "assistant";
|
||||
expected_mixed_content.content = "I'll help you search for products. ";
|
||||
expected_mixed_content.tool_calls = {
|
||||
{ "search_function", "{\"query\":\"laptops\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_mixed_content,
|
||||
"I'll help you search for products. <tool_call>\n"
|
||||
" <function=search_function>\n"
|
||||
" <parameter=query>\n"
|
||||
" laptops\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Compact format (no extra whitespace)
|
||||
common_chat_msg expected_compact;
|
||||
expected_compact.role = "assistant";
|
||||
expected_compact.tool_calls = {
|
||||
{ "compact_function", "{\"param\":\"value\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_compact,
|
||||
"<tool_call><function=compact_function><parameter=param>value</parameter></function></tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Function name with underscores and numbers
|
||||
common_chat_msg expected_complex_name;
|
||||
expected_complex_name.role = "assistant";
|
||||
expected_complex_name.tool_calls = {
|
||||
{ "get_user_data_v2", "{\"user_id\":12345}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_complex_name,
|
||||
"<tool_call>\n"
|
||||
" <function=get_user_data_v2>\n"
|
||||
" <parameter=user_id>\n"
|
||||
" 12345\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Parameter names with underscores and numbers
|
||||
common_chat_msg expected_complex_params;
|
||||
expected_complex_params.role = "assistant";
|
||||
expected_complex_params.tool_calls = {
|
||||
{ "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_complex_params,
|
||||
"<tool_call>\n"
|
||||
" <function=test_function>\n"
|
||||
" <parameter=param_1>\n"
|
||||
" value1\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=param_2_name>\n"
|
||||
" value2\n"
|
||||
" </parameter>\n"
|
||||
" <parameter=param3>\n"
|
||||
" 123\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Very deeply nested XML content in parameter
|
||||
common_chat_msg expected_deep_xml;
|
||||
expected_deep_xml.role = "assistant";
|
||||
expected_deep_xml.tool_calls = {
|
||||
{ "xml_parser", "{\"xml\":\"<root><level1><level2><level3>deep content</level3></level2></level1></root>\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_deep_xml,
|
||||
"<tool_call>\n"
|
||||
" <function=xml_parser>\n"
|
||||
" <parameter=xml>\n"
|
||||
" <root><level1><level2><level3>deep content</level3></level2></level1></root>\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Parameter with only whitespace
|
||||
common_chat_msg expected_whitespace_param;
|
||||
expected_whitespace_param.role = "assistant";
|
||||
expected_whitespace_param.tool_calls = {
|
||||
{ "whitespace_function", "{\"spaces\":\"\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_whitespace_param,
|
||||
"<tool_call>\n"
|
||||
" <function=whitespace_function>\n"
|
||||
" <parameter=spaces>\n"
|
||||
" \n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Parameter with tabs and mixed whitespace
|
||||
common_chat_msg expected_mixed_whitespace;
|
||||
expected_mixed_whitespace.role = "assistant";
|
||||
expected_mixed_whitespace.tool_calls = {
|
||||
{ "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_mixed_whitespace,
|
||||
"<tool_call>\n"
|
||||
" <function=tab_function>\n"
|
||||
" <parameter=content>\n"
|
||||
"line1\n"
|
||||
"\tindented line\n"
|
||||
" spaces\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Control characters and special Unicode
|
||||
common_chat_msg expected_control_chars;
|
||||
expected_control_chars.role = "assistant";
|
||||
expected_control_chars.tool_calls = {
|
||||
{ "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_control_chars,
|
||||
"<tool_call>\n"
|
||||
" <function=control_function>\n"
|
||||
" <parameter=text>\n"
|
||||
"Line1\nLine2\tTabbed\rCarriage return\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Emoji and extended Unicode characters
|
||||
common_chat_msg expected_emoji;
|
||||
expected_emoji.role = "assistant";
|
||||
expected_emoji.tool_calls = {
|
||||
{ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_emoji,
|
||||
"<tool_call>\n"
|
||||
" <function=emoji_function>\n"
|
||||
" <parameter=message>\n"
|
||||
" Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Mathematical expressions and formulas
|
||||
common_chat_msg expected_math;
|
||||
expected_math.role = "assistant";
|
||||
expected_math.tool_calls = {
|
||||
{ "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_math,
|
||||
"<tool_call>\n"
|
||||
" <function=math_function>\n"
|
||||
" <parameter=formula>\n"
|
||||
" E = mc² and ∫f(x)dx = F(x) + C\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// SQL injection-like content (should be safely escaped)
|
||||
common_chat_msg expected_sql;
|
||||
expected_sql.role = "assistant";
|
||||
expected_sql.tool_calls = {
|
||||
{ "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_sql,
|
||||
"<tool_call>\n"
|
||||
" <function=sql_function>\n"
|
||||
" <parameter=query>\n"
|
||||
" SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// HTML/XML injection content
|
||||
common_chat_msg expected_html;
|
||||
expected_html.role = "assistant";
|
||||
expected_html.tool_calls = {
|
||||
{ "html_function", "{\"content\":\"<script>alert('xss')</script><img src=x onerror=alert(1)>\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_html,
|
||||
"<tool_call>\n"
|
||||
" <function=html_function>\n"
|
||||
" <parameter=content>\n"
|
||||
" <script>alert('xss')</script><img src=x onerror=alert(1)>\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Binary-like content (base64)
|
||||
common_chat_msg expected_binary;
|
||||
expected_binary.role = "assistant";
|
||||
expected_binary.tool_calls = {
|
||||
{ "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_binary,
|
||||
"<tool_call>\n"
|
||||
" <function=binary_function>\n"
|
||||
" <parameter=data>\n"
|
||||
" SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
|
||||
// Very large numbers (should be parsed as scientific notation)
|
||||
common_chat_msg expected_large_numbers;
|
||||
expected_large_numbers.role = "assistant";
|
||||
expected_large_numbers.tool_calls = {
|
||||
{ "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation
|
||||
};
|
||||
|
||||
test_parser_with_streaming(
|
||||
expected_large_numbers,
|
||||
"<tool_call>\n"
|
||||
" <function=number_function>\n"
|
||||
" <parameter=big_int>\n"
|
||||
" 999999999999999999999999999999999999999999999999999999999999\n"
|
||||
" </parameter>\n"
|
||||
" </function>\n"
|
||||
"</tool_call>",
|
||||
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
|
||||
}
|
||||
|
||||
{
|
||||
// Qwen3-Coder template
|
||||
auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = { message_user };
|
||||
|
||||
common_chat_tool qwen_union_tool {
|
||||
/* .name = */ "qwen_union",
|
||||
/* .description = */ "Test tool for union/anyOf handling",
|
||||
/* .parameters = */ R"({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"priority": { "type": ["number", "null"] },
|
||||
"maybe_text": { "anyOf": [ { "type": "string" } ] },
|
||||
"config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
|
||||
},
|
||||
"required": []
|
||||
})",
|
||||
};
|
||||
inputs.tools = { qwen_union_tool };
|
||||
|
||||
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
|
||||
assert_equals(false, params.grammar.empty());
|
||||
|
||||
// Grammar should compile successfully
|
||||
auto grammar = build_grammar(params.grammar);
|
||||
GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
|
||||
}
|
||||
|
||||
{
|
||||
// Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
|
||||
// but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
|
||||
@@ -3665,6 +3143,135 @@ static void test_template_output_peg_parsers() {
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
// Qwen3-Coder
|
||||
auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
|
||||
|
||||
// Test basic message
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "Hello, world!\nWhat's up?";
|
||||
t.expect = message_assist;
|
||||
});
|
||||
|
||||
// Test tool call
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.tools = {special_function_tool};
|
||||
t.expect = message_assist_call;
|
||||
});
|
||||
|
||||
// Test parallel tool calls
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function_with_opt>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=arg2>\n"
|
||||
"2\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.parallel_tool_calls = true;
|
||||
t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "special_function",
|
||||
/* .arguments = */ R"({"arg1": 1})",
|
||||
/* .id = */ {},
|
||||
}, {
|
||||
/* .name = */ "special_function_with_opt",
|
||||
/* .arguments = */ R"({"arg1": 1, "arg2": 2})",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test tool call with string parameter
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
"def hello():\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"\n"
|
||||
"hello()\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.tools = {python_tool};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "python",
|
||||
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test tool call with JSON parameter
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"<tool_call>\n"
|
||||
"<function=todo_list>\n"
|
||||
"<parameter=todos>\n"
|
||||
"[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.tools = {todo_list_tool};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "todo_list",
|
||||
/* .arguments = */ "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test tool call with string parameter and no closing </parameter> tag
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
"def hello():\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"\n"
|
||||
"hello()\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.tools = {python_tool};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "python",
|
||||
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test response format
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = R"({"amount": 123.45, "date": "2025-12-03"})";
|
||||
t.params.json_schema = invoice_schema;
|
||||
|
||||
t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
// NVIDIA Nemotron-3 Nano
|
||||
auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
|
||||
|
||||
Binary file not shown.
@@ -1105,6 +1105,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
};
|
||||
|
||||
for (json item : input_value) {
|
||||
bool merge_prev = !chatcmpl_messages.empty() && chatcmpl_messages.back().value("role", "") == "assistant";
|
||||
|
||||
if (exists_and_is_string(item, "content")) {
|
||||
// #responses_create-input-input_item_list-input_message-content-text_input
|
||||
// Only "Input message" contains item["content"]::string
|
||||
@@ -1193,7 +1195,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
item.at("type") == "message"
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-output_message
|
||||
std::vector<json> chatcmpl_content;
|
||||
auto chatcmpl_content = json::array();
|
||||
|
||||
for (const auto & output_text : item.at("content")) {
|
||||
const std::string type = json_value(output_text, "type", std::string());
|
||||
@@ -1210,10 +1212,19 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
});
|
||||
}
|
||||
|
||||
item.erase("status");
|
||||
item.erase("type");
|
||||
item["content"] = chatcmpl_content;
|
||||
chatcmpl_messages.push_back(item);
|
||||
if (merge_prev) {
|
||||
auto & prev_msg = chatcmpl_messages.back();
|
||||
if (!exists_and_is_array(prev_msg, "content")) {
|
||||
prev_msg["content"] = json::array();
|
||||
}
|
||||
auto & prev_content = prev_msg["content"];
|
||||
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
|
||||
} else {
|
||||
item.erase("status");
|
||||
item.erase("type");
|
||||
item["content"] = chatcmpl_content;
|
||||
chatcmpl_messages.push_back(item);
|
||||
}
|
||||
} else if (exists_and_is_string(item, "arguments") &&
|
||||
exists_and_is_string(item, "call_id") &&
|
||||
exists_and_is_string(item, "name") &&
|
||||
@@ -1221,24 +1232,27 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
item.at("type") == "function_call"
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-function_tool_call
|
||||
json msg = json {
|
||||
{"role", "assistant"},
|
||||
{"tool_calls", json::array({ json {
|
||||
{"function", json {
|
||||
{"arguments", item.at("arguments")},
|
||||
{"name", item.at("name")},
|
||||
}},
|
||||
{"id", item.at("call_id")},
|
||||
{"type", "function"},
|
||||
}})},
|
||||
json tool_call = {
|
||||
{"function", json {
|
||||
{"arguments", item.at("arguments")},
|
||||
{"name", item.at("name")},
|
||||
}},
|
||||
{"id", item.at("call_id")},
|
||||
{"type", "function"},
|
||||
};
|
||||
|
||||
if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
|
||||
// Move reasoning content from dummy message to tool call message
|
||||
msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
|
||||
chatcmpl_messages.pop_back();
|
||||
if (merge_prev) {
|
||||
auto & prev_msg = chatcmpl_messages.back();
|
||||
if (!exists_and_is_array(prev_msg, "tool_calls")) {
|
||||
prev_msg["tool_calls"] = json::array();
|
||||
}
|
||||
prev_msg["tool_calls"].push_back(tool_call);
|
||||
} else {
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"role", "assistant"},
|
||||
{"tool_calls", json::array({tool_call})}
|
||||
});
|
||||
}
|
||||
chatcmpl_messages.push_back(msg);
|
||||
} else if (exists_and_is_string(item, "call_id") &&
|
||||
(exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
|
||||
exists_and_is_string(item, "type") &&
|
||||
@@ -1282,12 +1296,16 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
throw std::invalid_argument("item['content']['text'] is not a string");
|
||||
}
|
||||
|
||||
// Pack reasoning content in dummy message
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"role", "assistant"},
|
||||
{"content", json::array()},
|
||||
{"reasoning_content", item.at("content")[0].at("text")},
|
||||
});
|
||||
if (merge_prev) {
|
||||
auto & prev_msg = chatcmpl_messages.back();
|
||||
prev_msg["reasoning_content"] = item.at("content")[0].at("text");
|
||||
} else {
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"role", "assistant"},
|
||||
{"content", json::array()},
|
||||
{"reasoning_content", item.at("content")[0].at("text")},
|
||||
});
|
||||
}
|
||||
} else {
|
||||
throw std::invalid_argument("Cannot determine type of 'item'");
|
||||
}
|
||||
@@ -1296,20 +1314,6 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
throw std::invalid_argument("'input' must be a string or array of objects");
|
||||
}
|
||||
|
||||
// Remove unused dummy message which contains
|
||||
// reasoning content not followed by tool call
|
||||
chatcmpl_messages.erase(std::remove_if(
|
||||
chatcmpl_messages.begin(),
|
||||
chatcmpl_messages.end(),
|
||||
[](const json & x){ return x.contains("role") &&
|
||||
x.at("role") == "assistant" &&
|
||||
x.contains("content") &&
|
||||
x.at("content") == json::array() &&
|
||||
x.contains("reasoning_content");
|
||||
}),
|
||||
chatcmpl_messages.end()
|
||||
);
|
||||
|
||||
chatcmpl_body["messages"] = chatcmpl_messages;
|
||||
|
||||
if (response_body.contains("tools")) {
|
||||
|
||||
@@ -101,7 +101,7 @@ In a separate terminal, start the backend server:
|
||||
./llama-server -m model.gguf
|
||||
|
||||
# Multi-model (ROUTER mode)
|
||||
./llama-server --model-store /path/to/models
|
||||
./llama-server --models-dir /path/to/models
|
||||
```
|
||||
|
||||
### 3. Start Development Servers
|
||||
|
||||
@@ -251,9 +251,6 @@
|
||||
return options.find((option) => option.id === activeId);
|
||||
}
|
||||
|
||||
if (options.length === 1) {
|
||||
return options[0];
|
||||
}
|
||||
// No selection - return undefined to show "Select model"
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -306,6 +306,16 @@ class ModelsStore {
|
||||
const response = await ModelsService.listRouter();
|
||||
this.routerModels = response.data;
|
||||
await this.fetchModalitiesForLoadedModels();
|
||||
|
||||
const o = this.models.filter((option) => {
|
||||
const modelProps = this.getModelProps(option.model);
|
||||
|
||||
return modelProps?.webui !== false;
|
||||
});
|
||||
|
||||
if (o.length === 1 && this.isModelLoaded(o[0].model)) {
|
||||
this.selectModelById(o[0].id);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to fetch router models:', error);
|
||||
this.routerModels = [];
|
||||
|
||||
2530
vendor/cpp-httplib/httplib.cpp
vendored
2530
vendor/cpp-httplib/httplib.cpp
vendored
File diff suppressed because it is too large
Load Diff
465
vendor/cpp-httplib/httplib.h
vendored
465
vendor/cpp-httplib/httplib.h
vendored
@@ -8,8 +8,8 @@
|
||||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||
#define CPPHTTPLIB_HTTPLIB_H
|
||||
|
||||
#define CPPHTTPLIB_VERSION "0.32.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x002000"
|
||||
#define CPPHTTPLIB_VERSION "0.33.1"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x002101"
|
||||
|
||||
/*
|
||||
* Platform compatibility check
|
||||
@@ -185,6 +185,14 @@
|
||||
: 0))
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_THREAD_POOL_MAX_COUNT
|
||||
#define CPPHTTPLIB_THREAD_POOL_MAX_COUNT (CPPHTTPLIB_THREAD_POOL_COUNT * 4)
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_THREAD_POOL_IDLE_TIMEOUT
|
||||
#define CPPHTTPLIB_THREAD_POOL_IDLE_TIMEOUT 3 // seconds
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_RECV_FLAGS
|
||||
#define CPPHTTPLIB_RECV_FLAGS 0
|
||||
#endif
|
||||
@@ -201,6 +209,22 @@
|
||||
#define CPPHTTPLIB_MAX_LINE_LENGTH 32768
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WEBSOCKET_MAX_PAYLOAD_LENGTH
|
||||
#define CPPHTTPLIB_WEBSOCKET_MAX_PAYLOAD_LENGTH 16777216
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND
|
||||
#define CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND 300
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WEBSOCKET_CLOSE_TIMEOUT_SECOND
|
||||
#define CPPHTTPLIB_WEBSOCKET_CLOSE_TIMEOUT_SECOND 5
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND
|
||||
#define CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND 30
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Headers
|
||||
*/
|
||||
@@ -310,6 +334,7 @@ using socket_t = int;
|
||||
#include <errno.h>
|
||||
#include <exception>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
@@ -328,6 +353,9 @@ using socket_t = int;
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#if __cplusplus >= 201703L
|
||||
#include <any>
|
||||
#endif
|
||||
|
||||
#if defined(CPPHTTPLIB_USE_NON_BLOCKING_GETADDRINFO) || \
|
||||
defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
|
||||
@@ -415,10 +443,46 @@ using socket_t = int;
|
||||
|
||||
#endif // CPPHTTPLIB_MBEDTLS_SUPPORT
|
||||
|
||||
#ifdef CPPHTTPLIB_WOLFSSL_SUPPORT
|
||||
#include <wolfssl/options.h>
|
||||
|
||||
#include <wolfssl/openssl/x509v3.h>
|
||||
|
||||
// Fallback definitions for older wolfSSL versions (e.g., 5.6.6)
|
||||
#ifndef WOLFSSL_GEN_EMAIL
|
||||
#define WOLFSSL_GEN_EMAIL 1
|
||||
#endif
|
||||
#ifndef WOLFSSL_GEN_DNS
|
||||
#define WOLFSSL_GEN_DNS 2
|
||||
#endif
|
||||
#ifndef WOLFSSL_GEN_URI
|
||||
#define WOLFSSL_GEN_URI 6
|
||||
#endif
|
||||
#ifndef WOLFSSL_GEN_IPADD
|
||||
#define WOLFSSL_GEN_IPADD 7
|
||||
#endif
|
||||
|
||||
#include <wolfssl/ssl.h>
|
||||
#include <wolfssl/wolfcrypt/hash.h>
|
||||
#include <wolfssl/wolfcrypt/md5.h>
|
||||
#include <wolfssl/wolfcrypt/sha256.h>
|
||||
#include <wolfssl/wolfcrypt/sha512.h>
|
||||
#ifdef _WIN32
|
||||
#include <wincrypt.h>
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment(lib, "crypt32.lib")
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
|
||||
#if TARGET_OS_MAC
|
||||
#include <Security/Security.h>
|
||||
#endif
|
||||
#endif // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN
|
||||
#endif // CPPHTTPLIB_WOLFSSL_SUPPORT
|
||||
|
||||
// Define CPPHTTPLIB_SSL_ENABLED if any SSL backend is available
|
||||
// This simplifies conditional compilation when adding new backends (e.g.,
|
||||
// wolfSSL)
|
||||
#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) || defined(CPPHTTPLIB_MBEDTLS_SUPPORT)
|
||||
#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) || \
|
||||
defined(CPPHTTPLIB_MBEDTLS_SUPPORT) || defined(CPPHTTPLIB_WOLFSSL_SUPPORT)
|
||||
#define CPPHTTPLIB_SSL_ENABLED
|
||||
#endif
|
||||
|
||||
@@ -440,6 +504,10 @@ using socket_t = int;
|
||||
*/
|
||||
namespace httplib {
|
||||
|
||||
namespace ws {
|
||||
class WebSocket;
|
||||
} // namespace ws
|
||||
|
||||
namespace detail {
|
||||
|
||||
/*
|
||||
@@ -711,6 +779,143 @@ using Match = std::smatch;
|
||||
using DownloadProgress = std::function<bool(size_t current, size_t total)>;
|
||||
using UploadProgress = std::function<bool(size_t current, size_t total)>;
|
||||
|
||||
|
||||
#if __cplusplus >= 201703L
|
||||
|
||||
using any = std::any;
|
||||
using bad_any_cast = std::bad_any_cast;
|
||||
|
||||
template <typename T> T any_cast(const any &a) { return std::any_cast<T>(a); }
|
||||
template <typename T> T any_cast(any &a) { return std::any_cast<T>(a); }
|
||||
template <typename T> T any_cast(any &&a) {
|
||||
return std::any_cast<T>(std::move(a));
|
||||
}
|
||||
template <typename T> const T *any_cast(const any *a) noexcept {
|
||||
return std::any_cast<T>(a);
|
||||
}
|
||||
template <typename T> T *any_cast(any *a) noexcept {
|
||||
return std::any_cast<T>(a);
|
||||
}
|
||||
|
||||
#else // C++11/14 implementation
|
||||
|
||||
class bad_any_cast : public std::bad_cast {
|
||||
public:
|
||||
const char *what() const noexcept override { return "bad any_cast"; }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
using any_type_id = const void *;
|
||||
|
||||
// Returns a unique per-type ID without RTTI.
|
||||
// The static address is stable across TUs because function templates are
|
||||
// implicitly inline and the ODR merges their statics into one.
|
||||
template <typename T> any_type_id any_typeid() noexcept {
|
||||
static const char id = 0;
|
||||
return &id;
|
||||
}
|
||||
|
||||
struct any_storage {
|
||||
virtual ~any_storage() = default;
|
||||
virtual std::unique_ptr<any_storage> clone() const = 0;
|
||||
virtual any_type_id type_id() const noexcept = 0;
|
||||
};
|
||||
|
||||
template <typename T> struct any_value final : any_storage {
|
||||
T value;
|
||||
template <typename U> explicit any_value(U &&v) : value(std::forward<U>(v)) {}
|
||||
std::unique_ptr<any_storage> clone() const override {
|
||||
return std::unique_ptr<any_storage>(new any_value<T>(value));
|
||||
}
|
||||
any_type_id type_id() const noexcept override { return any_typeid<T>(); }
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
class any {
|
||||
std::unique_ptr<detail::any_storage> storage_;
|
||||
|
||||
public:
|
||||
any() noexcept = default;
|
||||
any(const any &o) : storage_(o.storage_ ? o.storage_->clone() : nullptr) {}
|
||||
any(any &&) noexcept = default;
|
||||
any &operator=(const any &o) {
|
||||
storage_ = o.storage_ ? o.storage_->clone() : nullptr;
|
||||
return *this;
|
||||
}
|
||||
any &operator=(any &&) noexcept = default;
|
||||
|
||||
template <
|
||||
typename T, typename D = typename std::decay<T>::type,
|
||||
typename std::enable_if<!std::is_same<D, any>::value, int>::type = 0>
|
||||
any(T &&v) : storage_(new detail::any_value<D>(std::forward<T>(v))) {}
|
||||
|
||||
template <
|
||||
typename T, typename D = typename std::decay<T>::type,
|
||||
typename std::enable_if<!std::is_same<D, any>::value, int>::type = 0>
|
||||
any &operator=(T &&v) {
|
||||
storage_.reset(new detail::any_value<D>(std::forward<T>(v)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool has_value() const noexcept { return storage_ != nullptr; }
|
||||
void reset() noexcept { storage_.reset(); }
|
||||
|
||||
template <typename T> friend T *any_cast(any *a) noexcept;
|
||||
template <typename T> friend const T *any_cast(const any *a) noexcept;
|
||||
};
|
||||
|
||||
template <typename T> T *any_cast(any *a) noexcept {
|
||||
if (!a || !a->storage_) { return nullptr; }
|
||||
if (a->storage_->type_id() != detail::any_typeid<T>()) { return nullptr; }
|
||||
return &static_cast<detail::any_value<T> *>(a->storage_.get())->value;
|
||||
}
|
||||
|
||||
template <typename T> const T *any_cast(const any *a) noexcept {
|
||||
if (!a || !a->storage_) { return nullptr; }
|
||||
if (a->storage_->type_id() != detail::any_typeid<T>()) { return nullptr; }
|
||||
return &static_cast<const detail::any_value<T> *>(a->storage_.get())->value;
|
||||
}
|
||||
|
||||
template <typename T> T any_cast(const any &a) {
|
||||
using U =
|
||||
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
|
||||
const U *p = any_cast<U>(&a);
|
||||
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
|
||||
if (!p) { throw bad_any_cast{}; }
|
||||
#else
|
||||
if (!p) { std::abort(); }
|
||||
#endif
|
||||
return static_cast<T>(*p);
|
||||
}
|
||||
|
||||
template <typename T> T any_cast(any &a) {
|
||||
using U =
|
||||
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
|
||||
U *p = any_cast<U>(&a);
|
||||
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
|
||||
if (!p) { throw bad_any_cast{}; }
|
||||
#else
|
||||
if (!p) { std::abort(); }
|
||||
#endif
|
||||
return static_cast<T>(*p);
|
||||
}
|
||||
|
||||
template <typename T> T any_cast(any &&a) {
|
||||
using U =
|
||||
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
|
||||
U *p = any_cast<U>(&a);
|
||||
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
|
||||
if (!p) { throw bad_any_cast{}; }
|
||||
#else
|
||||
if (!p) { std::abort(); }
|
||||
#endif
|
||||
return static_cast<T>(std::move(*p));
|
||||
}
|
||||
|
||||
#endif // __cplusplus >= 201703L
|
||||
|
||||
struct Response;
|
||||
using ResponseHandler = std::function<bool(const Response &response)>;
|
||||
|
||||
@@ -805,6 +1010,34 @@ struct FormDataProvider {
|
||||
};
|
||||
using FormDataProviderItems = std::vector<FormDataProvider>;
|
||||
|
||||
inline FormDataProvider
|
||||
make_file_provider(const std::string &name, const std::string &filepath,
|
||||
const std::string &filename = std::string(),
|
||||
const std::string &content_type = std::string()) {
|
||||
FormDataProvider fdp;
|
||||
fdp.name = name;
|
||||
fdp.filename = filename.empty() ? filepath : filename;
|
||||
fdp.content_type = content_type;
|
||||
fdp.provider = [filepath](size_t offset, DataSink &sink) -> bool {
|
||||
std::ifstream f(filepath, std::ios::binary);
|
||||
if (!f) { return false; }
|
||||
if (offset > 0) {
|
||||
f.seekg(static_cast<std::streamoff>(offset));
|
||||
if (!f.good()) {
|
||||
sink.done();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
char buf[8192];
|
||||
f.read(buf, sizeof(buf));
|
||||
auto n = static_cast<size_t>(f.gcount());
|
||||
if (n > 0) { return sink.write(buf, n); }
|
||||
sink.done(); // EOF
|
||||
return true;
|
||||
};
|
||||
return fdp;
|
||||
}
|
||||
|
||||
using ContentReceiverWithProgress = std::function<bool(
|
||||
const char *data, size_t data_length, size_t offset, size_t total_length)>;
|
||||
|
||||
@@ -1010,6 +1243,10 @@ struct Response {
|
||||
std::string body;
|
||||
std::string location; // Redirect location
|
||||
|
||||
// User-defined context — set by pre-routing/pre-request handlers and read
|
||||
// by route handlers to pass arbitrary data (e.g. decoded auth tokens).
|
||||
std::map<std::string, any> user_data;
|
||||
|
||||
bool has_header(const std::string &key) const;
|
||||
std::string get_header_value(const std::string &key, const char *def = "",
|
||||
size_t id = 0) const;
|
||||
@@ -1124,6 +1361,11 @@ public:
|
||||
|
||||
virtual time_t duration() const = 0;
|
||||
|
||||
virtual void set_read_timeout(time_t sec, time_t usec = 0) {
|
||||
(void)sec;
|
||||
(void)usec;
|
||||
}
|
||||
|
||||
ssize_t write(const char *ptr);
|
||||
ssize_t write(const std::string &s);
|
||||
|
||||
@@ -1146,7 +1388,7 @@ public:
|
||||
|
||||
class ThreadPool final : public TaskQueue {
|
||||
public:
|
||||
explicit ThreadPool(size_t n, size_t mqr = 0);
|
||||
explicit ThreadPool(size_t n, size_t max_n = 0, size_t mqr = 0);
|
||||
ThreadPool(const ThreadPool &) = delete;
|
||||
~ThreadPool() override = default;
|
||||
|
||||
@@ -1154,20 +1396,22 @@ public:
|
||||
void shutdown() override;
|
||||
|
||||
private:
|
||||
struct worker {
|
||||
explicit worker(ThreadPool &pool);
|
||||
void worker(bool is_dynamic);
|
||||
void move_to_finished(std::thread::id id);
|
||||
void cleanup_finished_threads();
|
||||
|
||||
void operator()();
|
||||
|
||||
ThreadPool &pool_;
|
||||
};
|
||||
friend struct worker;
|
||||
|
||||
std::vector<std::thread> threads_;
|
||||
std::list<std::function<void()>> jobs_;
|
||||
size_t base_thread_count_;
|
||||
size_t max_thread_count_;
|
||||
size_t max_queued_requests_;
|
||||
size_t idle_thread_count_;
|
||||
|
||||
bool shutdown_;
|
||||
size_t max_queued_requests_ = 0;
|
||||
|
||||
std::list<std::function<void()>> jobs_;
|
||||
std::vector<std::thread> threads_; // base threads
|
||||
std::list<std::thread> dynamic_threads_; // dynamic threads
|
||||
std::vector<std::thread>
|
||||
finished_threads_; // exited dynamic threads awaiting join
|
||||
|
||||
std::condition_variable cond_;
|
||||
std::mutex mutex_;
|
||||
@@ -1294,6 +1538,11 @@ public:
|
||||
using Expect100ContinueHandler =
|
||||
std::function<int(const Request &, Response &)>;
|
||||
|
||||
using WebSocketHandler =
|
||||
std::function<void(const Request &, ws::WebSocket &)>;
|
||||
using SubProtocolSelector =
|
||||
std::function<std::string(const std::vector<std::string> &protocols)>;
|
||||
|
||||
Server();
|
||||
|
||||
virtual ~Server();
|
||||
@@ -1311,6 +1560,10 @@ public:
|
||||
Server &Delete(const std::string &pattern, HandlerWithContentReader handler);
|
||||
Server &Options(const std::string &pattern, Handler handler);
|
||||
|
||||
Server &WebSocket(const std::string &pattern, WebSocketHandler handler);
|
||||
Server &WebSocket(const std::string &pattern, WebSocketHandler handler,
|
||||
SubProtocolSelector sub_protocol_selector);
|
||||
|
||||
bool set_base_dir(const std::string &dir,
|
||||
const std::string &mount_point = std::string());
|
||||
bool set_mount_point(const std::string &mount_point, const std::string &dir,
|
||||
@@ -1386,7 +1639,8 @@ protected:
|
||||
int remote_port, const std::string &local_addr,
|
||||
int local_port, bool close_connection,
|
||||
bool &connection_closed,
|
||||
const std::function<void(Request &)> &setup_request);
|
||||
const std::function<void(Request &)> &setup_request,
|
||||
bool *websocket_upgraded = nullptr);
|
||||
|
||||
std::atomic<socket_t> svr_sock_{INVALID_SOCKET};
|
||||
|
||||
@@ -1488,6 +1742,14 @@ private:
|
||||
HandlersForContentReader delete_handlers_for_content_reader_;
|
||||
Handlers options_handlers_;
|
||||
|
||||
struct WebSocketHandlerEntry {
|
||||
std::unique_ptr<detail::MatcherBase> matcher;
|
||||
WebSocketHandler handler;
|
||||
SubProtocolSelector sub_protocol_selector;
|
||||
};
|
||||
using WebSocketHandlers = std::vector<WebSocketHandlerEntry>;
|
||||
WebSocketHandlers websocket_handlers_;
|
||||
|
||||
HandlerWithResponse error_handler_;
|
||||
ExceptionHandler exception_handler_;
|
||||
HandlerWithResponse pre_routing_handler_;
|
||||
@@ -2970,6 +3232,36 @@ struct MbedTlsContext {
|
||||
} // namespace tls
|
||||
#endif
|
||||
|
||||
#ifdef CPPHTTPLIB_WOLFSSL_SUPPORT
|
||||
namespace tls {
|
||||
namespace impl {
|
||||
|
||||
// wolfSSL context wrapper (holds WOLFSSL_CTX and related state).
|
||||
// This struct is accessible via tls::impl for use in SSL context
|
||||
// setup callbacks (cast ctx_t to tls::impl::WolfSSLContext*).
|
||||
struct WolfSSLContext {
|
||||
WOLFSSL_CTX *ctx = nullptr;
|
||||
bool is_server = false;
|
||||
bool verify_client = false;
|
||||
bool has_verify_callback = false;
|
||||
std::string ca_pem_data_; // accumulated PEM for get_ca_names/get_ca_certs
|
||||
|
||||
WolfSSLContext();
|
||||
~WolfSSLContext();
|
||||
|
||||
WolfSSLContext(const WolfSSLContext &) = delete;
|
||||
WolfSSLContext &operator=(const WolfSSLContext &) = delete;
|
||||
};
|
||||
|
||||
// CA store for wolfSSL: holds raw PEM bytes to allow reloading into any ctx
|
||||
struct WolfSSLCAStore {
|
||||
std::string pem_data;
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
} // namespace tls
|
||||
#endif
|
||||
|
||||
#endif // CPPHTTPLIB_SSL_ENABLED
|
||||
|
||||
namespace stream {
|
||||
@@ -3335,6 +3627,143 @@ private:
|
||||
|
||||
} // namespace sse
|
||||
|
||||
namespace ws {
|
||||
|
||||
enum class Opcode : uint8_t {
|
||||
Continuation = 0x0,
|
||||
Text = 0x1,
|
||||
Binary = 0x2,
|
||||
Close = 0x8,
|
||||
Ping = 0x9,
|
||||
Pong = 0xA,
|
||||
};
|
||||
|
||||
enum class CloseStatus : uint16_t {
|
||||
Normal = 1000,
|
||||
GoingAway = 1001,
|
||||
ProtocolError = 1002,
|
||||
UnsupportedData = 1003,
|
||||
NoStatus = 1005,
|
||||
Abnormal = 1006,
|
||||
InvalidPayload = 1007,
|
||||
PolicyViolation = 1008,
|
||||
MessageTooBig = 1009,
|
||||
MandatoryExtension = 1010,
|
||||
InternalError = 1011,
|
||||
};
|
||||
|
||||
enum ReadResult : int { Fail = 0, Text = 1, Binary = 2 };
|
||||
|
||||
class WebSocket {
|
||||
public:
|
||||
WebSocket(const WebSocket &) = delete;
|
||||
WebSocket &operator=(const WebSocket &) = delete;
|
||||
~WebSocket();
|
||||
|
||||
ReadResult read(std::string &msg);
|
||||
bool send(const std::string &data);
|
||||
bool send(const char *data, size_t len);
|
||||
void close(CloseStatus status = CloseStatus::Normal,
|
||||
const std::string &reason = "");
|
||||
const Request &request() const;
|
||||
bool is_open() const;
|
||||
|
||||
private:
|
||||
friend class httplib::Server;
|
||||
friend class WebSocketClient;
|
||||
|
||||
WebSocket(Stream &strm, const Request &req, bool is_server)
|
||||
: strm_(strm), req_(req), is_server_(is_server) {
|
||||
start_heartbeat();
|
||||
}
|
||||
|
||||
WebSocket(std::unique_ptr<Stream> &&owned_strm, const Request &req,
|
||||
bool is_server)
|
||||
: strm_(*owned_strm), owned_strm_(std::move(owned_strm)), req_(req),
|
||||
is_server_(is_server) {
|
||||
start_heartbeat();
|
||||
}
|
||||
|
||||
void start_heartbeat();
|
||||
bool send_frame(Opcode op, const char *data, size_t len, bool fin = true);
|
||||
|
||||
Stream &strm_;
|
||||
std::unique_ptr<Stream> owned_strm_;
|
||||
Request req_;
|
||||
bool is_server_;
|
||||
std::atomic<bool> closed_{false};
|
||||
std::mutex write_mutex_;
|
||||
std::thread ping_thread_;
|
||||
std::mutex ping_mutex_;
|
||||
std::condition_variable ping_cv_;
|
||||
};
|
||||
|
||||
class WebSocketClient {
|
||||
public:
|
||||
explicit WebSocketClient(const std::string &scheme_host_port_path,
|
||||
const Headers &headers = {});
|
||||
|
||||
~WebSocketClient();
|
||||
WebSocketClient(const WebSocketClient &) = delete;
|
||||
WebSocketClient &operator=(const WebSocketClient &) = delete;
|
||||
|
||||
bool is_valid() const;
|
||||
|
||||
bool connect();
|
||||
ReadResult read(std::string &msg);
|
||||
bool send(const std::string &data);
|
||||
bool send(const char *data, size_t len);
|
||||
void close(CloseStatus status = CloseStatus::Normal,
|
||||
const std::string &reason = "");
|
||||
bool is_open() const;
|
||||
const std::string &subprotocol() const;
|
||||
void set_read_timeout(time_t sec, time_t usec = 0);
|
||||
void set_write_timeout(time_t sec, time_t usec = 0);
|
||||
|
||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
||||
void set_ca_cert_path(const std::string &path);
|
||||
void set_ca_cert_store(tls::ca_store_t store);
|
||||
void enable_server_certificate_verification(bool enabled);
|
||||
#endif
|
||||
|
||||
private:
|
||||
void shutdown_and_close();
|
||||
bool create_stream(std::unique_ptr<Stream> &strm);
|
||||
|
||||
std::string host_;
|
||||
int port_;
|
||||
std::string path_;
|
||||
Headers headers_;
|
||||
std::string subprotocol_;
|
||||
bool is_valid_ = false;
|
||||
socket_t sock_ = INVALID_SOCKET;
|
||||
std::unique_ptr<WebSocket> ws_;
|
||||
time_t read_timeout_sec_ = CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND;
|
||||
time_t read_timeout_usec_ = 0;
|
||||
time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND;
|
||||
time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND;
|
||||
|
||||
#ifdef CPPHTTPLIB_SSL_ENABLED
|
||||
bool is_ssl_ = false;
|
||||
tls::ctx_t tls_ctx_ = nullptr;
|
||||
tls::session_t tls_session_ = nullptr;
|
||||
std::string ca_cert_file_path_;
|
||||
tls::ca_store_t ca_cert_store_ = nullptr;
|
||||
bool server_certificate_verification_ = true;
|
||||
#endif
|
||||
};
|
||||
|
||||
namespace impl {
|
||||
|
||||
bool is_valid_utf8(const std::string &s);
|
||||
|
||||
bool read_websocket_frame(Stream &strm, Opcode &opcode, std::string &payload,
|
||||
bool &fin, bool expect_masked, size_t max_len);
|
||||
|
||||
} // namespace impl
|
||||
|
||||
} // namespace ws
|
||||
|
||||
|
||||
} // namespace httplib
|
||||
|
||||
|
||||
Reference in New Issue
Block a user