Compare commits

..

13 Commits
b8117 ... b8130

Author SHA1 Message Date
Aldehir Rojas
ed4837891d common : fix improper trimming in XML parser on complete message (#19805)
Co-authored-by: Jules LEIDELINGER <11395311+julio75012@users.noreply.github.com>
2026-02-22 17:34:54 +01:00
Kilian Krampf
cacc371f99 Fix wrong cli-argument in documentation (#19804) 2026-02-22 16:26:33 +01:00
HelloKS
ae2368e74e model : add Kanana-2 model support (#19803)
* model: Add Kanana-2 model support

* lint: adjust spacing
2026-02-22 16:15:02 +01:00
Sigbjørn Skjæret
9f0684f003 ci : fix rocm archive name [no ci] (#19808) 2026-02-22 16:14:37 +01:00
Aldehir Rojas
34ec1c3f18 server : merge contiguous Responses input items into a single assistant message (#19773)
* server : merge contiguous input items into a single assistant message

* cont : simplify tool call msg

* cont : reduce and combine content

* cont : fix merging content items
2026-02-22 14:11:31 +01:00
Sigbjørn Skjæret
e877ad8bd9 ci : fix rocm release path [no ci] (#19784) 2026-02-22 08:07:46 +01:00
Mario Limonciello
35715657cb Update ROCm docker container to 7.2 release (#19418)
Also update architectures
2026-02-21 21:53:39 +01:00
Mario Limonciello
f75c4e8bf5 Add a build target to generate ROCm artifacts using ROCm 7.2 (#19433)
This builds the following targets:
 * gfx1151
 * gfx1150
 * gfx1200
 * gfx1201
 * gfx1100
 * gfx1101
 * gfx1030
 * gfx908
 * gfx90a
 * gfx942
2026-02-21 19:56:26 +01:00
Adrien Gallouët
99156f3a5f vendor : update cpp-httplib to 0.33.1 (#19778)
Signed-off-by: Adrien Gallouët <adrien@gallouet.fr>
2026-02-21 19:12:31 +01:00
Gaurav Garg
a0c91e8f9f Improve CUDA graph capture (#19754)
* Improve CUDA graph capture

Currently, CUDA graphs are eagerly enabled on the first call to ggml_backend_cuda_graph_compute. If the graph properties keep changing (4+ consecutive updates), the graph is permanently disabled. This is suboptimal because:

- The first call always incurs CUDA graph capture overhead even if the graph is unstable
- Once permanently disabled, CUDA graphs never re-enable even after the graph stabilizes (e.g., switching from prompt processing to decode)

The new approach delays CUDA graph activation until warmup completes: the same cgraph must be called at least twice with matching properties before CUDA graph capture begins. This avoids wasted capture overhead on volatile graphs and allows graphs to become eligible once they stabilize.
This also fixes issues such as https://github.com/ggml-org/llama.cpp/discussions/19708

* Update ggml/src/ggml-cuda/ggml-cuda.cu

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

* Remove EM dashes

* Update ggml/src/ggml-cuda/ggml-cuda.cu

Co-authored-by: Aman Gupta <amangupta052@gmail.com>

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Co-authored-by: Aman Gupta <amangupta052@gmail.com>
2026-02-21 15:09:36 +05:30
crsawyer
07968d53e4 fix: UI single model selection in router mode (#19767) 2026-02-21 09:28:39 +01:00
Mengsheng Wu
ba3b9c8844 hexagon : fix build release (#19444) (#19587) 2026-02-20 16:40:00 -08:00
Aldehir Rojas
94b0200a01 common : merge qwen3-coder and nemotron nano 3 parsers (#19765)
* common : migrate qwen3-coder to PEG parsing variant

* cont : add JSON parameter test
2026-02-20 23:22:22 +01:00
22 changed files with 3196 additions and 832 deletions

View File

@@ -1,8 +1,8 @@
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG ROCM_VERSION=7.0
ARG AMDGPU_VERSION=7.0
ARG ROCM_VERSION=7.2
ARG AMDGPU_VERSION=7.2
# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
@@ -11,13 +11,12 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
#ARG ROCM_DOCKER_ARCH='gfx1151'
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'
# Set ROCm architectures
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}

View File

@@ -516,6 +516,102 @@ jobs:
path: llama-bin-win-sycl-x64.zip
name: llama-bin-win-sycl-x64.zip
ubuntu-22-rocm:
runs-on: ubuntu-22.04
strategy:
matrix:
include:
- ROCM_VERSION: "7.2"
gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201"
build: 'x64'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: ccache
uses: ggml-org/ccache-action@v1.2.16
with:
key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
evict-old-files: 1d
- name: Dependencies
id: depends
run: |
sudo apt install -y build-essential git cmake wget
- name: Setup Legacy ROCm
if: matrix.ROCM_VERSION == '7.2'
id: legacy_env
run: |
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
sudo tee /etc/apt/sources.list.d/rocm.list << EOF
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main
EOF
sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF
Package: *
Pin: release o=repo.radeon.com
Pin-Priority: 600
EOF
sudo apt update
sudo apt-get install -y libssl-dev rocm-hip-sdk
- name: Setup TheRock
if: matrix.ROCM_VERSION != '7.2'
id: therock_env
run: |
wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
mkdir install
tar -xf *.tar.gz -C install
export ROCM_PATH=$(pwd)/install
echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV
echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV
- name: Build with native CMake HIP support
id: cmake_build
run: |
cmake -B build -S . \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DCMAKE_HIP_FLAGS="-mllvm --amdgpu-unroll-threshold-local=600" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_BACKEND_DL=ON \
-DGGML_NATIVE=OFF \
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGPU_TARGETS="${{ matrix.gpu_targets }}" \
-DGGML_HIP=ON \
-DHIP_PLATFORM=amd \
-DGGML_HIP_ROCWMMA_FATTN=ON \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name
- name: Pack artifacts
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
name: llama-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
windows-hip:
runs-on: windows-2022
@@ -784,6 +880,7 @@ jobs:
- windows-cuda
- windows-sycl
- windows-hip
- ubuntu-22-rocm
- ubuntu-22-cpu
- ubuntu-22-vulkan
- macOS-arm64
@@ -868,6 +965,7 @@ jobs:
**Linux:**
- [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
- [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
- [Ubuntu x64 (ROCm 7.2)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-7.2-x64.tar.gz)
- [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
**Windows:**

View File

@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
cmake_minimum_required(VERSION 3.14...3.28) # for add_link_options and implicit target directories.
project("llama.cpp" C CXX)
include(CheckIncludeFileCXX)

View File

@@ -803,7 +803,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
}
// remove potential partial suffix
if (builder.pos() == builder.input().size()) {
if (builder.pos() == builder.input().size() && builder.is_partial()) {
if (unclosed_reasoning_content.empty()) {
rstrip(content);
trim_potential_partial_word(content);

View File

@@ -893,23 +893,6 @@ static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
}
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
static const xml_tool_call_format form = ([]() {
xml_tool_call_format form {};
form.scope_start = "<tool_call>";
form.tool_start = "<function=";
form.tool_sep = ">";
form.key_start = "<parameter=";
form.key_val_sep = ">";
form.val_end = "</parameter>";
form.tool_end = "</function>";
form.scope_end = "</tool_call>";
form.trim_raw_argval = true;
return form;
})();
builder.consume_reasoning_with_xml_tool_calls(form);
}
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
static const xml_tool_call_format form = ([]() {
xml_tool_call_format form {};
@@ -1590,9 +1573,6 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_KIMI_K2:
common_chat_parse_kimi_k2(builder);
break;
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
common_chat_parse_qwen3_coder_xml(builder);
break;
case COMMON_CHAT_FORMAT_APRIEL_1_5:
common_chat_parse_apriel_1_5(builder);
break;

View File

@@ -736,7 +736,6 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
@@ -1522,14 +1521,17 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
return data;
}
static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
static common_chat_params common_chat_params_init_qwen3_coder(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
data.prompt = apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
// Nemotron Nano 3 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
// Handle thinking tags appropriately based on inputs.enable_thinking
if (string_ends_with(data.prompt, "<think>\n")) {
if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
if (!inputs.enable_thinking) {
data.prompt += "</think>";
} else {
@@ -1538,19 +1540,21 @@ static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_
}
data.preserved_tokens = {
"<think>",
"</think>",
"<tool_call>",
"</tool_call>",
};
if (supports_reasoning) {
data.preserved_tokens.insert(data.preserved_tokens.end(), {"<think>", "</think>"});
}
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
auto include_grammar = true;
auto parser = build_chat_peg_constructed_parser([&](auto & p) {
auto reasoning = p.eps();
if (inputs.enable_thinking && extract_reasoning) {
if (supports_reasoning && inputs.enable_thinking && extract_reasoning) {
auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
if (data.thinking_forced_open) {
reasoning = reasoning_content;
@@ -1888,38 +1892,6 @@ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_t
return data;
}
static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.prompt = apply(tmpl, params);
data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
data.preserved_tokens = {
"<tool_call>",
"</tool_call>",
"<function=",
"</function>",
"<parameter=",
"</parameter>",
};
// build grammar for tool call
static const xml_tool_call_format form {
/* form.scope_start = */ "<tool_call>\n",
/* form.tool_start = */ "<function=",
/* form.tool_sep = */ ">\n",
/* form.key_start = */ "<parameter=",
/* form.key_val_sep = */ ">\n",
/* form.val_end = */ "\n</parameter>\n",
/* form.tool_end = */ "</function>\n",
/* form.scope_end = */ "</tool_call>",
};
build_grammar_xml_tool_call(data, params.tools, form);
return data;
}
static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
common_chat_params data;
data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -3147,13 +3119,7 @@ static common_chat_params common_chat_templates_apply_jinja(
src.find("<function=") != std::string::npos &&
src.find("<parameter=") != std::string::npos) {
workaround::func_args_not_string(params.messages);
// Models with <think> support (Step-3.5-Flash, Nemotron 3 Nano) use the
// Nemotron v3 PEG parser for streaming and schema-aware parameter parsing.
// Qwen3-Coder has no <think> in its template.
if (src.find("<think>") != std::string::npos) {
return common_chat_params_init_nemotron_v3(tmpl, params);
}
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
return common_chat_params_init_qwen3_coder(tmpl, params);
}
// Xiaomi MiMo format detection (must come before Hermes 2 Pro)

View File

@@ -128,7 +128,6 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_GLM_4_5,
COMMON_CHAT_FORMAT_MINIMAX_M2,
COMMON_CHAT_FORMAT_KIMI_K2,
COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
COMMON_CHAT_FORMAT_APRIEL_1_5,
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
COMMON_CHAT_FORMAT_SOLAR_OPEN,

View File

@@ -1274,6 +1274,9 @@ class TextModel(ModelBase):
if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
# ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
res = "joyai-llm"
if chkhsh == "e4d54df1ebc1f2b91acd986c5b51aa50837d5faf7c7398e73c1f9e9ee5d19869":
# ref: https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601
res = "kanana2"
if res is None:
logger.warning("\n")

View File

@@ -152,6 +152,7 @@ models = [
{"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
{"name": "qwen35", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
{"name": "joyai-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
{"name": "kanana2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
]
# some models are known to be broken upstream, so we will skip them as exceptions

View File

@@ -1149,8 +1149,7 @@ struct ggml_cuda_graph {
size_t num_nodes = 0;
std::vector<cudaGraphNode_t> nodes;
bool disable_due_to_gpu_arch = false;
bool disable_due_to_too_many_updates = false;
int number_consecutive_updates = 0;
bool warmup_complete = false;
std::vector<ggml_cuda_graph_node_properties> props;
// these are extra tensors (inputs) that participate in the ggml graph but are not nodes
@@ -1159,21 +1158,9 @@ struct ggml_cuda_graph {
// ref: https://github.com/ggml-org/llama.cpp/pull/19165
std::vector<ggml_cuda_graph_node_properties> extra;
void record_update(bool use_graph, bool update_required) {
if (use_graph && update_required) {
number_consecutive_updates++;
} else {
number_consecutive_updates = 0;
}
if (number_consecutive_updates >= 4) {
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
disable_due_to_too_many_updates = true;
}
}
bool is_enabled() const {
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
return !(disable_due_to_gpu_arch || disable_cuda_graphs_due_to_env || disable_due_to_too_many_updates);
return !(disable_due_to_gpu_arch || disable_cuda_graphs_due_to_env);
}
#endif
};

View File

@@ -2979,10 +2979,6 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
if (graph->instance == nullptr) {
res = true;
}
// Check if the graph size has changed
if (graph->props.size() != (size_t)cgraph->n_nodes) {
res = true;
@@ -3931,14 +3927,35 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
#ifdef USE_CUDA_GRAPH
graph_key = ggml_cuda_graph_get_key(cgraph);
use_cuda_graph = ggml_cuda_graph_set_enabled(cuda_ctx, graph_key);
ggml_cuda_graph_set_enabled(cuda_ctx, graph_key);
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
if (graph->is_enabled()) {
cuda_graph_update_required = ggml_cuda_graph_update_required(cuda_ctx, cgraph);
use_cuda_graph = ggml_cuda_graph_check_compability(cgraph);
const bool graph_compatible = ggml_cuda_graph_check_compability(cgraph);
if (graph_compatible) {
const bool properties_changed = ggml_cuda_graph_update_required(cuda_ctx, cgraph);
graph->record_update(use_cuda_graph, cuda_graph_update_required);
if (!graph->warmup_complete) {
// Warmup: need at least 2 calls with no property change on the 2nd call
if (!properties_changed) {
graph->warmup_complete = true;
GGML_LOG_DEBUG("%s: CUDA graph warmup complete\n", __func__);
use_cuda_graph = true;
cuda_graph_update_required = true;
}
// else: properties changed or first call - execute directly (use_cuda_graph stays false)
} else {
// Post-warmup: normal CUDA graph operation
if (properties_changed) {
// Properties changed - reset warmup, execute directly until stable again
graph->warmup_complete = false;
GGML_LOG_DEBUG("%s: CUDA graph warmup reset\n", __func__);
} else {
use_cuda_graph = true;
cuda_graph_update_required = graph->instance == nullptr;
}
}
}
}
#endif // USE_CUDA_GRAPH

View File

@@ -5,7 +5,7 @@ import os
import sys
import subprocess
HTTPLIB_VERSION = "d4180e923f846b44a3d30acd938438d6e64fc9f6"
HTTPLIB_VERSION = "refs/tags/v0.33.1"
vendor = {
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",

View File

@@ -1703,8 +1703,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
} break;
case LLM_ARCH_DEEPSEEK2:
{
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);

View File

@@ -2027,7 +2027,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
} else if (
tokenizer_pre == "gpt-4o" ||
tokenizer_pre == "llama4") {
tokenizer_pre == "llama4" ||
tokenizer_pre == "kanana2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
clean_spaces = false;
} else if (

View File

@@ -229,6 +229,20 @@ common_chat_tool python_tool {
"required": ["code"]
})",
};
common_chat_tool todo_list_tool {
/* .name = */ "todo_list",
/* .description = */ "Create or update the todo list",
/* .parameters = */ R"({
"type": "object",
"properties": {
"todos": {
"type": "array",
"description": "List of TODO list items"
}
},
"required": ["todos"]
})",
};
common_chat_tool code_interpreter_tool {
/* .name = */ "code_interpreter",
/* .description = */ "an ipython interpreter",
@@ -3018,542 +3032,6 @@ Hey there!<|im_end|>
);
}
// Test Qwen3-Coder XML format
{
// Basic XML tool call parsing
assert_msg_equals(
message_assist_call,
test_chat_parse(
"<tool_call>\n"
" <function=special_function>\n"
" <parameter=arg1>\n"
" 1\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_QWEN3_CODER_XML}));
// Multiple parameters with different types
common_chat_msg expected_multi_param;
expected_multi_param.role = "assistant";
expected_multi_param.tool_calls = {
{ "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}", "" }
};
test_parser_with_streaming(expected_multi_param,
"<tool_call>\n"
" <function=complex_function>\n"
" <parameter=name>\n"
" John Doe\n"
" </parameter>\n"
" <parameter=age>\n"
" 30\n"
" </parameter>\n"
" <parameter=active>\n"
" true\n"
" </parameter>\n"
" <parameter=score>\n"
" 95.5\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Special characters and Unicode
common_chat_msg expected_special_chars;
expected_special_chars.role = "assistant";
expected_special_chars.tool_calls = {
{ "unicode_function", "{\"message\":\"Hello 世界! 🌍 Special chars: @#$%^&*()\"}", "" }
};
test_parser_with_streaming(expected_special_chars,
"<tool_call>\n"
" <function=unicode_function>\n"
" <parameter=message>\n"
" Hello 世界! 🌍 Special chars: @#$%^&*()\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Multiline content with newlines and indentation
common_chat_msg expected_multiline;
expected_multiline.role = "assistant";
expected_multiline.tool_calls = {
{ "code_function", "{\"code\":\"def hello():\\n print(\\\"Hello, World!\\\")\\n return True\"}", "" }
};
test_parser_with_streaming(expected_multiline,
"<tool_call>\n"
" <function=code_function>\n"
" <parameter=code>\n"
"def hello():\n"
" print(\"Hello, World!\")\n"
" return True\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// JSON object as parameter value
common_chat_msg expected_json_param;
expected_json_param.role = "assistant";
expected_json_param.tool_calls = {
{ "json_function", "{\"config\":{\"host\":\"localhost\",\"port\":8080,\"ssl\":false}}", "" }
};
test_parser_with_streaming(
expected_json_param,
"<tool_call>\n"
" <function=json_function>\n"
" <parameter=config>\n"
" {\"host\": \"localhost\", \"port\": 8080, \"ssl\": false}\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Array as parameter value
common_chat_msg expected_array_param;
expected_array_param.role = "assistant";
expected_array_param.tool_calls = {
{ "array_function", "{\"items\":[\"apple\",\"banana\",\"cherry\"]}", "" }
};
test_parser_with_streaming(
expected_array_param,
"<tool_call>\n"
" <function=array_function>\n"
" <parameter=items>\n"
" [\"apple\", \"banana\", \"cherry\"]\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Empty parameter
common_chat_msg expected_empty_param;
expected_empty_param.role = "assistant";
expected_empty_param.tool_calls = {
{ "empty_function", "{\"empty_param\":\"\"}", "" }
};
test_parser_with_streaming(
expected_empty_param,
"<tool_call>\n"
" <function=empty_function>\n"
" <parameter=empty_param>\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Boolean values (true/false)
common_chat_msg expected_boolean;
expected_boolean.role = "assistant";
expected_boolean.tool_calls = {
{ "boolean_function", "{\"enabled\":true,\"debug\":false}", "" }
};
test_parser_with_streaming(
expected_boolean,
"<tool_call>\n"
" <function=boolean_function>\n"
" <parameter=enabled>\n"
" true\n"
" </parameter>\n"
" <parameter=debug>\n"
" false\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Null value
common_chat_msg expected_null;
expected_null.role = "assistant";
expected_null.tool_calls = {
{ "null_function", "{\"optional_param\":null}", "" }
};
test_parser_with_streaming(
expected_null,
"<tool_call>\n"
" <function=null_function>\n"
" <parameter=optional_param>\n"
" null\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Negative numbers and scientific notation
common_chat_msg expected_numbers;
expected_numbers.role = "assistant";
expected_numbers.tool_calls = {
{ "math_function", "{\"negative\":-42,\"decimal\":-3.14,\"scientific\":1.23e-4}", "" }
};
test_parser_with_streaming(
expected_numbers,
"<tool_call>\n"
" <function=math_function>\n"
" <parameter=negative>\n"
" -42\n"
" </parameter>\n"
" <parameter=decimal>\n"
" -3.14\n"
" </parameter>\n"
" <parameter=scientific>\n"
" 1.23e-4\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// XML-like content in parameters (should be escaped)
common_chat_msg expected_xml_content;
expected_xml_content.role = "assistant";
expected_xml_content.tool_calls = {
{ "xml_function", "{\"xml_content\":\"<root><item>value</item></root>\"}", "" }
};
test_parser_with_streaming(
expected_xml_content,
"<tool_call>\n"
" <function=xml_function>\n"
" <parameter=xml_content>\n"
" <root><item>value</item></root>\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Quotes and escape characters
common_chat_msg expected_quotes;
expected_quotes.role = "assistant";
expected_quotes.tool_calls = {
{ "quote_function", "{\"message\":\"She said \\\"Hello!\\\" and left.\"}", "" }
};
test_parser_with_streaming(
expected_quotes,
"<tool_call>\n"
" <function=quote_function>\n"
" <parameter=message>\n"
" She said \"Hello!\" and left.\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Long parameter value (simplified)
std::string long_text = "This is a long text parameter that should test the parser's ability to handle larger amounts of text data.";
common_chat_msg expected_long_text;
expected_long_text.role = "assistant";
expected_long_text.tool_calls = {
{ "long_function", "{\"long_text\":\"" + long_text + "\"}", "" }
};
test_parser_with_streaming(
expected_long_text,
"<tool_call>\n"
" <function=long_function>\n"
" <parameter=long_text>\n"
" " + long_text + "\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Mixed content with text before and after tool call
common_chat_msg expected_mixed_content;
expected_mixed_content.role = "assistant";
expected_mixed_content.content = "I'll help you search for products. ";
expected_mixed_content.tool_calls = {
{ "search_function", "{\"query\":\"laptops\"}", "" }
};
test_parser_with_streaming(
expected_mixed_content,
"I'll help you search for products. <tool_call>\n"
" <function=search_function>\n"
" <parameter=query>\n"
" laptops\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Compact format (no extra whitespace)
common_chat_msg expected_compact;
expected_compact.role = "assistant";
expected_compact.tool_calls = {
{ "compact_function", "{\"param\":\"value\"}", "" }
};
test_parser_with_streaming(
expected_compact,
"<tool_call><function=compact_function><parameter=param>value</parameter></function></tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Function name with underscores and numbers
common_chat_msg expected_complex_name;
expected_complex_name.role = "assistant";
expected_complex_name.tool_calls = {
{ "get_user_data_v2", "{\"user_id\":12345}", "" }
};
test_parser_with_streaming(
expected_complex_name,
"<tool_call>\n"
" <function=get_user_data_v2>\n"
" <parameter=user_id>\n"
" 12345\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Parameter names with underscores and numbers
common_chat_msg expected_complex_params;
expected_complex_params.role = "assistant";
expected_complex_params.tool_calls = {
{ "test_function", "{\"param_1\":\"value1\",\"param_2_name\":\"value2\",\"param3\":123}", "" }
};
test_parser_with_streaming(
expected_complex_params,
"<tool_call>\n"
" <function=test_function>\n"
" <parameter=param_1>\n"
" value1\n"
" </parameter>\n"
" <parameter=param_2_name>\n"
" value2\n"
" </parameter>\n"
" <parameter=param3>\n"
" 123\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Very deeply nested XML content in parameter
common_chat_msg expected_deep_xml;
expected_deep_xml.role = "assistant";
expected_deep_xml.tool_calls = {
{ "xml_parser", "{\"xml\":\"<root><level1><level2><level3>deep content</level3></level2></level1></root>\"}", "" }
};
test_parser_with_streaming(
expected_deep_xml,
"<tool_call>\n"
" <function=xml_parser>\n"
" <parameter=xml>\n"
" <root><level1><level2><level3>deep content</level3></level2></level1></root>\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Parameter with only whitespace
common_chat_msg expected_whitespace_param;
expected_whitespace_param.role = "assistant";
expected_whitespace_param.tool_calls = {
{ "whitespace_function", "{\"spaces\":\"\"}", "" }
};
test_parser_with_streaming(
expected_whitespace_param,
"<tool_call>\n"
" <function=whitespace_function>\n"
" <parameter=spaces>\n"
" \n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Parameter with tabs and mixed whitespace
common_chat_msg expected_mixed_whitespace;
expected_mixed_whitespace.role = "assistant";
expected_mixed_whitespace.tool_calls = {
{ "tab_function", "{\"content\":\"line1\\n\\tindented line\\n spaces\"}", "" }
};
test_parser_with_streaming(
expected_mixed_whitespace,
"<tool_call>\n"
" <function=tab_function>\n"
" <parameter=content>\n"
"line1\n"
"\tindented line\n"
" spaces\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Control characters and special Unicode
common_chat_msg expected_control_chars;
expected_control_chars.role = "assistant";
expected_control_chars.tool_calls = {
{ "control_function", "{\"text\":\"Line1\\nLine2\\tTabbed\\rCarriage return\"}", "" }
};
test_parser_with_streaming(
expected_control_chars,
"<tool_call>\n"
" <function=control_function>\n"
" <parameter=text>\n"
"Line1\nLine2\tTabbed\rCarriage return\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Emoji and extended Unicode characters
common_chat_msg expected_emoji;
expected_emoji.role = "assistant";
expected_emoji.tool_calls = {
{ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" }
};
test_parser_with_streaming(
expected_emoji,
"<tool_call>\n"
" <function=emoji_function>\n"
" <parameter=message>\n"
" Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Mathematical expressions and formulas
common_chat_msg expected_math;
expected_math.role = "assistant";
expected_math.tool_calls = {
{ "math_function", "{\"formula\":\"E = mc² and ∫f(x)dx = F(x) + C\"}", "" }
};
test_parser_with_streaming(
expected_math,
"<tool_call>\n"
" <function=math_function>\n"
" <parameter=formula>\n"
" E = mc² and ∫f(x)dx = F(x) + C\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// SQL injection-like content (should be safely escaped)
common_chat_msg expected_sql;
expected_sql.role = "assistant";
expected_sql.tool_calls = {
{ "sql_function", "{\"query\":\"SELECT * FROM users WHERE id = 1; DROP TABLE users; --\"}", "" }
};
test_parser_with_streaming(
expected_sql,
"<tool_call>\n"
" <function=sql_function>\n"
" <parameter=query>\n"
" SELECT * FROM users WHERE id = 1; DROP TABLE users; --\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// HTML/XML injection content
common_chat_msg expected_html;
expected_html.role = "assistant";
expected_html.tool_calls = {
{ "html_function", "{\"content\":\"<script>alert('xss')</script><img src=x onerror=alert(1)>\"}", "" }
};
test_parser_with_streaming(
expected_html,
"<tool_call>\n"
" <function=html_function>\n"
" <parameter=content>\n"
" <script>alert('xss')</script><img src=x onerror=alert(1)>\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Binary-like content (base64)
common_chat_msg expected_binary;
expected_binary.role = "assistant";
expected_binary.tool_calls = {
{ "binary_function", "{\"data\":\"SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\"}", "" }
};
test_parser_with_streaming(
expected_binary,
"<tool_call>\n"
" <function=binary_function>\n"
" <parameter=data>\n"
" SGVsbG8gV29ybGQhIFRoaXMgaXMgYmFzZTY0IGVuY29kZWQgdGV4dC4=\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
// Very large numbers (should be parsed as scientific notation)
common_chat_msg expected_large_numbers;
expected_large_numbers.role = "assistant";
expected_large_numbers.tool_calls = {
{ "number_function", "{\"big_int\":1e+60}", "" } // Large number becomes scientific notation
};
test_parser_with_streaming(
expected_large_numbers,
"<tool_call>\n"
" <function=number_function>\n"
" <parameter=big_int>\n"
" 999999999999999999999999999999999999999999999999999999999999\n"
" </parameter>\n"
" </function>\n"
"</tool_call>",
[&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_QWEN3_CODER_XML}); });
}
{
// Qwen3-Coder template
auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
common_chat_templates_inputs inputs;
inputs.messages = { message_user };
common_chat_tool qwen_union_tool {
/* .name = */ "qwen_union",
/* .description = */ "Test tool for union/anyOf handling",
/* .parameters = */ R"({
"type": "object",
"properties": {
"priority": { "type": ["number", "null"] },
"maybe_text": { "anyOf": [ { "type": "string" } ] },
"config": { "anyOf": [ { "type": "object" }, { "type": "null" } ] }
},
"required": []
})",
};
inputs.tools = { qwen_union_tool };
auto params = common_chat_templates_apply(tmpls.get(), inputs);
assert_equals(COMMON_CHAT_FORMAT_QWEN3_CODER_XML, params.format);
assert_equals(false, params.grammar.empty());
// Grammar should compile successfully
auto grammar = build_grammar(params.grammar);
GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
}
{
// Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
// but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
@@ -3665,6 +3143,135 @@ static void test_template_output_peg_parsers() {
});
}
{
// Qwen3-Coder
auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
// Test basic message
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input = "Hello, world!\nWhat's up?";
t.expect = message_assist;
});
// Test tool call
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input =
"<tool_call>\n"
"<function=special_function>\n"
"<parameter=arg1>\n"
"1\n"
"</parameter>\n"
"</function>\n"
"</tool_call>";
t.params.tools = {special_function_tool};
t.expect = message_assist_call;
});
// Test parallel tool calls
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input =
"<tool_call>\n"
"<function=special_function>\n"
"<parameter=arg1>\n"
"1\n"
"</parameter>\n"
"</function>\n"
"</tool_call>\n"
"<tool_call>\n"
"<function=special_function_with_opt>\n"
"<parameter=arg1>\n"
"1\n"
"</parameter>\n"
"<parameter=arg2>\n"
"2\n"
"</parameter>\n"
"</function>\n"
"</tool_call>";
t.params.parallel_tool_calls = true;
t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
t.expect.tool_calls = {{
/* .name = */ "special_function",
/* .arguments = */ R"({"arg1": 1})",
/* .id = */ {},
}, {
/* .name = */ "special_function_with_opt",
/* .arguments = */ R"({"arg1": 1, "arg2": 2})",
/* .id = */ {},
}};
});
// Test tool call with string parameter
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input =
"<tool_call>\n"
"<function=python>\n"
"<parameter=code>\n"
"def hello():\n"
" print(\"Hello, world!\")\n"
"\n"
"hello()\n"
"</parameter>\n"
"</function>\n"
"</tool_call>";
t.params.tools = {python_tool};
t.expect.tool_calls = {{
/* .name = */ "python",
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
/* .id = */ {},
}};
});
// Test tool call with JSON parameter
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input =
"<tool_call>\n"
"<function=todo_list>\n"
"<parameter=todos>\n"
"[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
"</parameter>\n"
"</function>\n"
"</tool_call>";
t.params.tools = {todo_list_tool};
t.expect.tool_calls = {{
/* .name = */ "todo_list",
/* .arguments = */ "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}",
/* .id = */ {},
}};
});
// Test tool call with string parameter and no closing </parameter> tag
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input =
"<tool_call>\n"
"<function=python>\n"
"<parameter=code>\n"
"def hello():\n"
" print(\"Hello, world!\")\n"
"\n"
"hello()\n"
"</function>\n"
"</tool_call>";
t.params.tools = {python_tool};
t.expect.tool_calls = {{
/* .name = */ "python",
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
/* .id = */ {},
}};
});
// Test response format
test_peg_parser(tmpls.get(), [&](auto & t) {
t.input = R"({"amount": 123.45, "date": "2025-12-03"})";
t.params.json_schema = invoice_schema;
t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
});
}
{
// NVIDIA Nemotron-3 Nano
auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");

Binary file not shown.

View File

@@ -1105,6 +1105,8 @@ json convert_responses_to_chatcmpl(const json & response_body) {
};
for (json item : input_value) {
bool merge_prev = !chatcmpl_messages.empty() && chatcmpl_messages.back().value("role", "") == "assistant";
if (exists_and_is_string(item, "content")) {
// #responses_create-input-input_item_list-input_message-content-text_input
// Only "Input message" contains item["content"]::string
@@ -1193,7 +1195,7 @@ json convert_responses_to_chatcmpl(const json & response_body) {
item.at("type") == "message"
) {
// #responses_create-input-input_item_list-item-output_message
std::vector<json> chatcmpl_content;
auto chatcmpl_content = json::array();
for (const auto & output_text : item.at("content")) {
const std::string type = json_value(output_text, "type", std::string());
@@ -1210,10 +1212,19 @@ json convert_responses_to_chatcmpl(const json & response_body) {
});
}
item.erase("status");
item.erase("type");
item["content"] = chatcmpl_content;
chatcmpl_messages.push_back(item);
if (merge_prev) {
auto & prev_msg = chatcmpl_messages.back();
if (!exists_and_is_array(prev_msg, "content")) {
prev_msg["content"] = json::array();
}
auto & prev_content = prev_msg["content"];
prev_content.insert(prev_content.end(), chatcmpl_content.begin(), chatcmpl_content.end());
} else {
item.erase("status");
item.erase("type");
item["content"] = chatcmpl_content;
chatcmpl_messages.push_back(item);
}
} else if (exists_and_is_string(item, "arguments") &&
exists_and_is_string(item, "call_id") &&
exists_and_is_string(item, "name") &&
@@ -1221,24 +1232,27 @@ json convert_responses_to_chatcmpl(const json & response_body) {
item.at("type") == "function_call"
) {
// #responses_create-input-input_item_list-item-function_tool_call
json msg = json {
{"role", "assistant"},
{"tool_calls", json::array({ json {
{"function", json {
{"arguments", item.at("arguments")},
{"name", item.at("name")},
}},
{"id", item.at("call_id")},
{"type", "function"},
}})},
json tool_call = {
{"function", json {
{"arguments", item.at("arguments")},
{"name", item.at("name")},
}},
{"id", item.at("call_id")},
{"type", "function"},
};
if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
// Move reasoning content from dummy message to tool call message
msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
chatcmpl_messages.pop_back();
if (merge_prev) {
auto & prev_msg = chatcmpl_messages.back();
if (!exists_and_is_array(prev_msg, "tool_calls")) {
prev_msg["tool_calls"] = json::array();
}
prev_msg["tool_calls"].push_back(tool_call);
} else {
chatcmpl_messages.push_back(json {
{"role", "assistant"},
{"tool_calls", json::array({tool_call})}
});
}
chatcmpl_messages.push_back(msg);
} else if (exists_and_is_string(item, "call_id") &&
(exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
exists_and_is_string(item, "type") &&
@@ -1282,12 +1296,16 @@ json convert_responses_to_chatcmpl(const json & response_body) {
throw std::invalid_argument("item['content']['text'] is not a string");
}
// Pack reasoning content in dummy message
chatcmpl_messages.push_back(json {
{"role", "assistant"},
{"content", json::array()},
{"reasoning_content", item.at("content")[0].at("text")},
});
if (merge_prev) {
auto & prev_msg = chatcmpl_messages.back();
prev_msg["reasoning_content"] = item.at("content")[0].at("text");
} else {
chatcmpl_messages.push_back(json {
{"role", "assistant"},
{"content", json::array()},
{"reasoning_content", item.at("content")[0].at("text")},
});
}
} else {
throw std::invalid_argument("Cannot determine type of 'item'");
}
@@ -1296,20 +1314,6 @@ json convert_responses_to_chatcmpl(const json & response_body) {
throw std::invalid_argument("'input' must be a string or array of objects");
}
// Remove unused dummy message which contains
// reasoning content not followed by tool call
chatcmpl_messages.erase(std::remove_if(
chatcmpl_messages.begin(),
chatcmpl_messages.end(),
[](const json & x){ return x.contains("role") &&
x.at("role") == "assistant" &&
x.contains("content") &&
x.at("content") == json::array() &&
x.contains("reasoning_content");
}),
chatcmpl_messages.end()
);
chatcmpl_body["messages"] = chatcmpl_messages;
if (response_body.contains("tools")) {

View File

@@ -101,7 +101,7 @@ In a separate terminal, start the backend server:
./llama-server -m model.gguf
# Multi-model (ROUTER mode)
./llama-server --model-store /path/to/models
./llama-server --models-dir /path/to/models
```
### 3. Start Development Servers

View File

@@ -251,9 +251,6 @@
return options.find((option) => option.id === activeId);
}
if (options.length === 1) {
return options[0];
}
// No selection - return undefined to show "Select model"
return undefined;
}

View File

@@ -306,6 +306,16 @@ class ModelsStore {
const response = await ModelsService.listRouter();
this.routerModels = response.data;
await this.fetchModalitiesForLoadedModels();
const o = this.models.filter((option) => {
const modelProps = this.getModelProps(option.model);
return modelProps?.webui !== false;
});
if (o.length === 1 && this.isModelLoaded(o[0].model)) {
this.selectModelById(o[0].id);
}
} catch (error) {
console.warn('Failed to fetch router models:', error);
this.routerModels = [];

File diff suppressed because it is too large Load Diff

View File

@@ -8,8 +8,8 @@
#ifndef CPPHTTPLIB_HTTPLIB_H
#define CPPHTTPLIB_HTTPLIB_H
#define CPPHTTPLIB_VERSION "0.32.0"
#define CPPHTTPLIB_VERSION_NUM "0x002000"
#define CPPHTTPLIB_VERSION "0.33.1"
#define CPPHTTPLIB_VERSION_NUM "0x002101"
/*
* Platform compatibility check
@@ -185,6 +185,14 @@
: 0))
#endif
#ifndef CPPHTTPLIB_THREAD_POOL_MAX_COUNT
#define CPPHTTPLIB_THREAD_POOL_MAX_COUNT (CPPHTTPLIB_THREAD_POOL_COUNT * 4)
#endif
#ifndef CPPHTTPLIB_THREAD_POOL_IDLE_TIMEOUT
#define CPPHTTPLIB_THREAD_POOL_IDLE_TIMEOUT 3 // seconds
#endif
#ifndef CPPHTTPLIB_RECV_FLAGS
#define CPPHTTPLIB_RECV_FLAGS 0
#endif
@@ -201,6 +209,22 @@
#define CPPHTTPLIB_MAX_LINE_LENGTH 32768
#endif
#ifndef CPPHTTPLIB_WEBSOCKET_MAX_PAYLOAD_LENGTH
#define CPPHTTPLIB_WEBSOCKET_MAX_PAYLOAD_LENGTH 16777216
#endif
#ifndef CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND
#define CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND 300
#endif
#ifndef CPPHTTPLIB_WEBSOCKET_CLOSE_TIMEOUT_SECOND
#define CPPHTTPLIB_WEBSOCKET_CLOSE_TIMEOUT_SECOND 5
#endif
#ifndef CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND
#define CPPHTTPLIB_WEBSOCKET_PING_INTERVAL_SECOND 30
#endif
/*
* Headers
*/
@@ -310,6 +334,7 @@ using socket_t = int;
#include <errno.h>
#include <exception>
#include <fcntl.h>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
@@ -328,6 +353,9 @@ using socket_t = int;
#include <unordered_map>
#include <unordered_set>
#include <utility>
#if __cplusplus >= 201703L
#include <any>
#endif
#if defined(CPPHTTPLIB_USE_NON_BLOCKING_GETADDRINFO) || \
defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
@@ -415,10 +443,46 @@ using socket_t = int;
#endif // CPPHTTPLIB_MBEDTLS_SUPPORT
#ifdef CPPHTTPLIB_WOLFSSL_SUPPORT
#include <wolfssl/options.h>
#include <wolfssl/openssl/x509v3.h>
// Fallback definitions for older wolfSSL versions (e.g., 5.6.6)
#ifndef WOLFSSL_GEN_EMAIL
#define WOLFSSL_GEN_EMAIL 1
#endif
#ifndef WOLFSSL_GEN_DNS
#define WOLFSSL_GEN_DNS 2
#endif
#ifndef WOLFSSL_GEN_URI
#define WOLFSSL_GEN_URI 6
#endif
#ifndef WOLFSSL_GEN_IPADD
#define WOLFSSL_GEN_IPADD 7
#endif
#include <wolfssl/ssl.h>
#include <wolfssl/wolfcrypt/hash.h>
#include <wolfssl/wolfcrypt/md5.h>
#include <wolfssl/wolfcrypt/sha256.h>
#include <wolfssl/wolfcrypt/sha512.h>
#ifdef _WIN32
#include <wincrypt.h>
#ifdef _MSC_VER
#pragma comment(lib, "crypt32.lib")
#endif
#endif // _WIN32
#if defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
#if TARGET_OS_MAC
#include <Security/Security.h>
#endif
#endif // CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN
#endif // CPPHTTPLIB_WOLFSSL_SUPPORT
// Define CPPHTTPLIB_SSL_ENABLED if any SSL backend is available
// This simplifies conditional compilation when adding new backends (e.g.,
// wolfSSL)
#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) || defined(CPPHTTPLIB_MBEDTLS_SUPPORT)
#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) || \
defined(CPPHTTPLIB_MBEDTLS_SUPPORT) || defined(CPPHTTPLIB_WOLFSSL_SUPPORT)
#define CPPHTTPLIB_SSL_ENABLED
#endif
@@ -440,6 +504,10 @@ using socket_t = int;
*/
namespace httplib {
namespace ws {
class WebSocket;
} // namespace ws
namespace detail {
/*
@@ -711,6 +779,143 @@ using Match = std::smatch;
using DownloadProgress = std::function<bool(size_t current, size_t total)>;
using UploadProgress = std::function<bool(size_t current, size_t total)>;
#if __cplusplus >= 201703L
using any = std::any;
using bad_any_cast = std::bad_any_cast;
template <typename T> T any_cast(const any &a) { return std::any_cast<T>(a); }
template <typename T> T any_cast(any &a) { return std::any_cast<T>(a); }
template <typename T> T any_cast(any &&a) {
return std::any_cast<T>(std::move(a));
}
template <typename T> const T *any_cast(const any *a) noexcept {
return std::any_cast<T>(a);
}
template <typename T> T *any_cast(any *a) noexcept {
return std::any_cast<T>(a);
}
#else // C++11/14 implementation
class bad_any_cast : public std::bad_cast {
public:
const char *what() const noexcept override { return "bad any_cast"; }
};
namespace detail {
using any_type_id = const void *;
// Returns a unique per-type ID without RTTI.
// The static address is stable across TUs because function templates are
// implicitly inline and the ODR merges their statics into one.
template <typename T> any_type_id any_typeid() noexcept {
static const char id = 0;
return &id;
}
struct any_storage {
virtual ~any_storage() = default;
virtual std::unique_ptr<any_storage> clone() const = 0;
virtual any_type_id type_id() const noexcept = 0;
};
template <typename T> struct any_value final : any_storage {
T value;
template <typename U> explicit any_value(U &&v) : value(std::forward<U>(v)) {}
std::unique_ptr<any_storage> clone() const override {
return std::unique_ptr<any_storage>(new any_value<T>(value));
}
any_type_id type_id() const noexcept override { return any_typeid<T>(); }
};
} // namespace detail
class any {
std::unique_ptr<detail::any_storage> storage_;
public:
any() noexcept = default;
any(const any &o) : storage_(o.storage_ ? o.storage_->clone() : nullptr) {}
any(any &&) noexcept = default;
any &operator=(const any &o) {
storage_ = o.storage_ ? o.storage_->clone() : nullptr;
return *this;
}
any &operator=(any &&) noexcept = default;
template <
typename T, typename D = typename std::decay<T>::type,
typename std::enable_if<!std::is_same<D, any>::value, int>::type = 0>
any(T &&v) : storage_(new detail::any_value<D>(std::forward<T>(v))) {}
template <
typename T, typename D = typename std::decay<T>::type,
typename std::enable_if<!std::is_same<D, any>::value, int>::type = 0>
any &operator=(T &&v) {
storage_.reset(new detail::any_value<D>(std::forward<T>(v)));
return *this;
}
bool has_value() const noexcept { return storage_ != nullptr; }
void reset() noexcept { storage_.reset(); }
template <typename T> friend T *any_cast(any *a) noexcept;
template <typename T> friend const T *any_cast(const any *a) noexcept;
};
template <typename T> T *any_cast(any *a) noexcept {
if (!a || !a->storage_) { return nullptr; }
if (a->storage_->type_id() != detail::any_typeid<T>()) { return nullptr; }
return &static_cast<detail::any_value<T> *>(a->storage_.get())->value;
}
template <typename T> const T *any_cast(const any *a) noexcept {
if (!a || !a->storage_) { return nullptr; }
if (a->storage_->type_id() != detail::any_typeid<T>()) { return nullptr; }
return &static_cast<const detail::any_value<T> *>(a->storage_.get())->value;
}
template <typename T> T any_cast(const any &a) {
using U =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
const U *p = any_cast<U>(&a);
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
if (!p) { throw bad_any_cast{}; }
#else
if (!p) { std::abort(); }
#endif
return static_cast<T>(*p);
}
template <typename T> T any_cast(any &a) {
using U =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
U *p = any_cast<U>(&a);
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
if (!p) { throw bad_any_cast{}; }
#else
if (!p) { std::abort(); }
#endif
return static_cast<T>(*p);
}
template <typename T> T any_cast(any &&a) {
using U =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
U *p = any_cast<U>(&a);
#ifndef CPPHTTPLIB_NO_EXCEPTIONS
if (!p) { throw bad_any_cast{}; }
#else
if (!p) { std::abort(); }
#endif
return static_cast<T>(std::move(*p));
}
#endif // __cplusplus >= 201703L
struct Response;
using ResponseHandler = std::function<bool(const Response &response)>;
@@ -805,6 +1010,34 @@ struct FormDataProvider {
};
using FormDataProviderItems = std::vector<FormDataProvider>;
inline FormDataProvider
make_file_provider(const std::string &name, const std::string &filepath,
const std::string &filename = std::string(),
const std::string &content_type = std::string()) {
FormDataProvider fdp;
fdp.name = name;
fdp.filename = filename.empty() ? filepath : filename;
fdp.content_type = content_type;
fdp.provider = [filepath](size_t offset, DataSink &sink) -> bool {
std::ifstream f(filepath, std::ios::binary);
if (!f) { return false; }
if (offset > 0) {
f.seekg(static_cast<std::streamoff>(offset));
if (!f.good()) {
sink.done();
return true;
}
}
char buf[8192];
f.read(buf, sizeof(buf));
auto n = static_cast<size_t>(f.gcount());
if (n > 0) { return sink.write(buf, n); }
sink.done(); // EOF
return true;
};
return fdp;
}
using ContentReceiverWithProgress = std::function<bool(
const char *data, size_t data_length, size_t offset, size_t total_length)>;
@@ -1010,6 +1243,10 @@ struct Response {
std::string body;
std::string location; // Redirect location
// User-defined context — set by pre-routing/pre-request handlers and read
// by route handlers to pass arbitrary data (e.g. decoded auth tokens).
std::map<std::string, any> user_data;
bool has_header(const std::string &key) const;
std::string get_header_value(const std::string &key, const char *def = "",
size_t id = 0) const;
@@ -1124,6 +1361,11 @@ public:
virtual time_t duration() const = 0;
virtual void set_read_timeout(time_t sec, time_t usec = 0) {
(void)sec;
(void)usec;
}
ssize_t write(const char *ptr);
ssize_t write(const std::string &s);
@@ -1146,7 +1388,7 @@ public:
class ThreadPool final : public TaskQueue {
public:
explicit ThreadPool(size_t n, size_t mqr = 0);
explicit ThreadPool(size_t n, size_t max_n = 0, size_t mqr = 0);
ThreadPool(const ThreadPool &) = delete;
~ThreadPool() override = default;
@@ -1154,20 +1396,22 @@ public:
void shutdown() override;
private:
struct worker {
explicit worker(ThreadPool &pool);
void worker(bool is_dynamic);
void move_to_finished(std::thread::id id);
void cleanup_finished_threads();
void operator()();
ThreadPool &pool_;
};
friend struct worker;
std::vector<std::thread> threads_;
std::list<std::function<void()>> jobs_;
size_t base_thread_count_;
size_t max_thread_count_;
size_t max_queued_requests_;
size_t idle_thread_count_;
bool shutdown_;
size_t max_queued_requests_ = 0;
std::list<std::function<void()>> jobs_;
std::vector<std::thread> threads_; // base threads
std::list<std::thread> dynamic_threads_; // dynamic threads
std::vector<std::thread>
finished_threads_; // exited dynamic threads awaiting join
std::condition_variable cond_;
std::mutex mutex_;
@@ -1294,6 +1538,11 @@ public:
using Expect100ContinueHandler =
std::function<int(const Request &, Response &)>;
using WebSocketHandler =
std::function<void(const Request &, ws::WebSocket &)>;
using SubProtocolSelector =
std::function<std::string(const std::vector<std::string> &protocols)>;
Server();
virtual ~Server();
@@ -1311,6 +1560,10 @@ public:
Server &Delete(const std::string &pattern, HandlerWithContentReader handler);
Server &Options(const std::string &pattern, Handler handler);
Server &WebSocket(const std::string &pattern, WebSocketHandler handler);
Server &WebSocket(const std::string &pattern, WebSocketHandler handler,
SubProtocolSelector sub_protocol_selector);
bool set_base_dir(const std::string &dir,
const std::string &mount_point = std::string());
bool set_mount_point(const std::string &mount_point, const std::string &dir,
@@ -1386,7 +1639,8 @@ protected:
int remote_port, const std::string &local_addr,
int local_port, bool close_connection,
bool &connection_closed,
const std::function<void(Request &)> &setup_request);
const std::function<void(Request &)> &setup_request,
bool *websocket_upgraded = nullptr);
std::atomic<socket_t> svr_sock_{INVALID_SOCKET};
@@ -1488,6 +1742,14 @@ private:
HandlersForContentReader delete_handlers_for_content_reader_;
Handlers options_handlers_;
struct WebSocketHandlerEntry {
std::unique_ptr<detail::MatcherBase> matcher;
WebSocketHandler handler;
SubProtocolSelector sub_protocol_selector;
};
using WebSocketHandlers = std::vector<WebSocketHandlerEntry>;
WebSocketHandlers websocket_handlers_;
HandlerWithResponse error_handler_;
ExceptionHandler exception_handler_;
HandlerWithResponse pre_routing_handler_;
@@ -2970,6 +3232,36 @@ struct MbedTlsContext {
} // namespace tls
#endif
#ifdef CPPHTTPLIB_WOLFSSL_SUPPORT
namespace tls {
namespace impl {
// wolfSSL context wrapper (holds WOLFSSL_CTX and related state).
// This struct is accessible via tls::impl for use in SSL context
// setup callbacks (cast ctx_t to tls::impl::WolfSSLContext*).
struct WolfSSLContext {
WOLFSSL_CTX *ctx = nullptr;
bool is_server = false;
bool verify_client = false;
bool has_verify_callback = false;
std::string ca_pem_data_; // accumulated PEM for get_ca_names/get_ca_certs
WolfSSLContext();
~WolfSSLContext();
WolfSSLContext(const WolfSSLContext &) = delete;
WolfSSLContext &operator=(const WolfSSLContext &) = delete;
};
// CA store for wolfSSL: holds raw PEM bytes to allow reloading into any ctx
struct WolfSSLCAStore {
std::string pem_data;
};
} // namespace impl
} // namespace tls
#endif
#endif // CPPHTTPLIB_SSL_ENABLED
namespace stream {
@@ -3335,6 +3627,143 @@ private:
} // namespace sse
namespace ws {
enum class Opcode : uint8_t {
Continuation = 0x0,
Text = 0x1,
Binary = 0x2,
Close = 0x8,
Ping = 0x9,
Pong = 0xA,
};
enum class CloseStatus : uint16_t {
Normal = 1000,
GoingAway = 1001,
ProtocolError = 1002,
UnsupportedData = 1003,
NoStatus = 1005,
Abnormal = 1006,
InvalidPayload = 1007,
PolicyViolation = 1008,
MessageTooBig = 1009,
MandatoryExtension = 1010,
InternalError = 1011,
};
enum ReadResult : int { Fail = 0, Text = 1, Binary = 2 };
class WebSocket {
public:
WebSocket(const WebSocket &) = delete;
WebSocket &operator=(const WebSocket &) = delete;
~WebSocket();
ReadResult read(std::string &msg);
bool send(const std::string &data);
bool send(const char *data, size_t len);
void close(CloseStatus status = CloseStatus::Normal,
const std::string &reason = "");
const Request &request() const;
bool is_open() const;
private:
friend class httplib::Server;
friend class WebSocketClient;
WebSocket(Stream &strm, const Request &req, bool is_server)
: strm_(strm), req_(req), is_server_(is_server) {
start_heartbeat();
}
WebSocket(std::unique_ptr<Stream> &&owned_strm, const Request &req,
bool is_server)
: strm_(*owned_strm), owned_strm_(std::move(owned_strm)), req_(req),
is_server_(is_server) {
start_heartbeat();
}
void start_heartbeat();
bool send_frame(Opcode op, const char *data, size_t len, bool fin = true);
Stream &strm_;
std::unique_ptr<Stream> owned_strm_;
Request req_;
bool is_server_;
std::atomic<bool> closed_{false};
std::mutex write_mutex_;
std::thread ping_thread_;
std::mutex ping_mutex_;
std::condition_variable ping_cv_;
};
class WebSocketClient {
public:
explicit WebSocketClient(const std::string &scheme_host_port_path,
const Headers &headers = {});
~WebSocketClient();
WebSocketClient(const WebSocketClient &) = delete;
WebSocketClient &operator=(const WebSocketClient &) = delete;
bool is_valid() const;
bool connect();
ReadResult read(std::string &msg);
bool send(const std::string &data);
bool send(const char *data, size_t len);
void close(CloseStatus status = CloseStatus::Normal,
const std::string &reason = "");
bool is_open() const;
const std::string &subprotocol() const;
void set_read_timeout(time_t sec, time_t usec = 0);
void set_write_timeout(time_t sec, time_t usec = 0);
#ifdef CPPHTTPLIB_SSL_ENABLED
void set_ca_cert_path(const std::string &path);
void set_ca_cert_store(tls::ca_store_t store);
void enable_server_certificate_verification(bool enabled);
#endif
private:
void shutdown_and_close();
bool create_stream(std::unique_ptr<Stream> &strm);
std::string host_;
int port_;
std::string path_;
Headers headers_;
std::string subprotocol_;
bool is_valid_ = false;
socket_t sock_ = INVALID_SOCKET;
std::unique_ptr<WebSocket> ws_;
time_t read_timeout_sec_ = CPPHTTPLIB_WEBSOCKET_READ_TIMEOUT_SECOND;
time_t read_timeout_usec_ = 0;
time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND;
time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND;
#ifdef CPPHTTPLIB_SSL_ENABLED
bool is_ssl_ = false;
tls::ctx_t tls_ctx_ = nullptr;
tls::session_t tls_session_ = nullptr;
std::string ca_cert_file_path_;
tls::ca_store_t ca_cert_store_ = nullptr;
bool server_certificate_verification_ = true;
#endif
};
namespace impl {
bool is_valid_utf8(const std::string &s);
bool read_websocket_frame(Stream &strm, Opcode &opcode, std::string &payload,
bool &fin, bool expect_masked, size_t max_len);
} // namespace impl
} // namespace ws
} // namespace httplib