jinja: correct stats for tojson and string filters (#19785 )

common : fix improper trimming in XML parser on complete message (#19805 )
Co-authored-by: Jules LEIDELINGER <11395311+julio75012@users.noreply.github.com>
2026-02-26 14:23:22 +02:00 · 2026-02-22 21:08:23 +01:00 · 2026-02-22 17:34:54 +01:00 · 2026-02-22 16:26:33 +01:00 · 2026-02-22 16:15:02 +01:00 · 2026-02-22 16:14:37 +01:00
11 changed files with 112 additions and 13 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -604,7 +604,7 @@ jobs:
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
-          tar -czvf llama-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
+          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
--- a/common/chat-parser-xml-toolcall.cpp
+++ b/common/chat-parser-xml-toolcall.cpp
@@ -803,7 +803,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
        }

        // remove potential partial suffix
-        if (builder.pos() == builder.input().size()) {
+        if (builder.pos() == builder.input().size() && builder.is_partial()) {
            if (unclosed_reasoning_content.empty()) {
                rstrip(content);
                trim_potential_partial_word(content);
--- a/common/jinja/runtime.cpp
+++ b/common/jinja/runtime.cpp
@@ -85,7 +85,7 @@ value identifier::execute_impl(context & ctx) {
    auto builtins = global_builtins();
    if (!it->is_undefined()) {
        if (ctx.is_get_stats) {
-            it->stats.used = true;
+            value_t::stats_t::mark_used(it);
        }
        JJ_DEBUG("Identifier '%s' found, type = %s", val.c_str(), it->type().c_str());
        return it;
@@ -277,7 +277,7 @@ value binary_expression::execute_impl(context & ctx) {
 static value try_builtin_func(context & ctx, const std::string & name, value & input, bool undef_on_missing = false) {
    JJ_DEBUG("Trying built-in function '%s' for type %s", name.c_str(), input->type().c_str());
    if (ctx.is_get_stats) {
-        input->stats.used = true;
+        value_t::stats_t::mark_used(input);
        input->stats.ops.insert(name);
    }
    auto builtins = input->get_builtins();
@@ -448,7 +448,7 @@ value for_statement::execute_impl(context & ctx) {

    // mark the variable being iterated as used for stats
    if (ctx.is_get_stats) {
-        iterable_val->stats.used = true;
+        value_t::stats_t::mark_used(iterable_val);
        iterable_val->stats.ops.insert("array_access");
    }

@@ -470,7 +470,7 @@ value for_statement::execute_impl(context & ctx) {
            items.push_back(std::move(tuple));
        }
        if (ctx.is_get_stats) {
-            iterable_val->stats.used = true;
+            value_t::stats_t::mark_used(iterable_val);
            iterable_val->stats.ops.insert("object_access");
        }
    } else {
@@ -480,7 +480,7 @@ value for_statement::execute_impl(context & ctx) {
            items.push_back(item);
        }
        if (ctx.is_get_stats) {
-            iterable_val->stats.used = true;
+            value_t::stats_t::mark_used(iterable_val);
            iterable_val->stats.ops.insert("array_access");
        }
    }
@@ -817,8 +817,9 @@ value member_expression::execute_impl(context & ctx) {
    }

    if (ctx.is_get_stats && val && object && property) {
-        val->stats.used = true;
-        object->stats.used = true;
+        value_t::stats_t::mark_used(val);
+        value_t::stats_t::mark_used(object);
+        value_t::stats_t::mark_used(property);
        if (is_val<value_int>(property)) {
            object->stats.ops.insert("array_access");
        } else if (is_val<value_string>(property)) {
--- a/common/jinja/value.cpp
+++ b/common/jinja/value.cpp
@@ -161,6 +161,11 @@ static value tojson(const func_args & args) {
    value val_separators = args.get_kwarg_or_pos("separators",   3);
    value val_sort       = args.get_kwarg_or_pos("sort_keys",    4);
    int indent = -1;
+    if (args.ctx.is_get_stats) {
+        // mark as used (recursively) for stats
+        auto val_input = args.get_pos(0);
+        value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
+    }
    if (is_val<value_int>(val_indent)) {
        indent = static_cast<int>(val_indent->as_int());
    }
@@ -891,6 +896,11 @@ const func_builtins & value_array_t::get_builtins() const {
        }},
        {"string", [](const func_args & args) -> value {
            args.ensure_vals<value_array>();
+            if (args.ctx.is_get_stats) {
+                // mark as used (recursively) for stats
+                auto val_input = args.get_pos(0);
+                value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
+            }
            return mk_val<value_string>(args.get_pos(0)->as_string());
        }},
        {"tojson", tojson},
@@ -1046,6 +1056,11 @@ const func_builtins & value_object_t::get_builtins() const {
        {"tojson", tojson},
        {"string", [](const func_args & args) -> value {
            args.ensure_vals<value_object>();
+            if (args.ctx.is_get_stats) {
+                // mark as used (recursively) for stats
+                auto val_input = args.get_pos(0);
+                value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
+            }
            return mk_val<value_string>(args.get_pos(0)->as_string());
        }},
        {"length", [](const func_args & args) -> value {
@@ -1358,4 +1373,21 @@ std::string value_to_string_repr(const value & val) {
    }
 }

+// stats utility
+void value_t::stats_t::mark_used(value & val, bool deep) {
+    val->stats.used = true;
+    if (deep) {
+        if (is_val<value_array>(val)) {
+            for (auto & item : val->val_arr) {
+                mark_used(item, deep);
+            }
+        } else if (is_val<value_object>(val)) {
+            for (auto & pair : val->val_obj) {
+                mark_used(pair.first, deep);
+                mark_used(pair.second, deep);
+            }
+        }
+    }
+}
+
 } // namespace jinja
--- a/common/jinja/value.h
+++ b/common/jinja/value.h
@@ -118,6 +118,8 @@ struct value_t {
        bool used = false;
        // ops can be builtin calls or operators: "array_access", "object_access"
        std::set<std::string> ops;
+        // utility to recursively mark value and its children as used
+        static void mark_used(value & val, bool deep = false);
    } stats;

    value_t() = default;
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1274,6 +1274,9 @@ class TextModel(ModelBase):
        if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
            # ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
            res = "joyai-llm"
+        if chkhsh == "e4d54df1ebc1f2b91acd986c5b51aa50837d5faf7c7398e73c1f9e9ee5d19869":
+            # ref: https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601
+            res = "kanana2"

        if res is None:
            logger.warning("\n")
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -152,6 +152,7 @@ models = [
    {"name": "exaone-moe",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
    {"name": "qwen35",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
    {"name": "joyai-llm",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
+    {"name": "kanana2",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
 ]

 # some models are known to be broken upstream, so we will skip them as exceptions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1703,8 +1703,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
            } break;
        case LLM_ARCH_DEEPSEEK2:
            {
-                // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
-                const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
+                // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
+                const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));

                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT,   hparams.n_layer_dense_lead);
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2027,7 +2027,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
            } else if (
                tokenizer_pre == "gpt-4o" ||
-                tokenizer_pre == "llama4") {
+                tokenizer_pre == "llama4" ||
+                tokenizer_pre == "kanana2") {
                pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
                clean_spaces = false;
            } else if (
--- a/tests/test-jinja.cpp
+++ b/tests/test-jinja.cpp
@@ -32,6 +32,7 @@ static void test_string_methods(testing & t);
 static void test_array_methods(testing & t);
 static void test_object_methods(testing & t);
 static void test_hasher(testing & t);
+static void test_stats(testing & t);
 static void test_fuzzing(testing & t);

 static bool g_python_mode = false;
@@ -70,6 +71,7 @@ int main(int argc, char *argv[]) {
    t.test("object methods", test_object_methods);
    if (!g_python_mode) {
        t.test("hasher", test_hasher);
+        t.test("stats", test_stats);
        t.test("fuzzing", test_fuzzing);
    }

@@ -1795,6 +1797,63 @@ static void test_hasher(testing & t) {
    });
 }

+static void test_stats(testing & t) {
+    static auto get_stats = [](const std::string & tmpl, const json & vars) -> jinja::value {
+        jinja::lexer lexer;
+        auto lexer_res = lexer.tokenize(tmpl);
+
+        jinja::program prog = jinja::parse_from_tokens(lexer_res);
+
+        jinja::context ctx(tmpl);
+        jinja::global_from_json(ctx, json{{ "val", vars }}, true);
+        ctx.is_get_stats = true;
+
+        jinja::runtime runtime(ctx);
+        runtime.execute(prog);
+
+        return ctx.get_val("val");
+    };
+
+    t.test("stats", [](testing & t) {
+        jinja::value val = get_stats(
+            "{{val.num}} "
+            "{{val.str}} "
+            "{{val.arr[0]}} "
+            "{{val.obj.key1}} "
+            "{{val.nested | tojson}}",
+            // Note: the json below will be wrapped inside "val" in the context
+            json{
+                {"num", 1},
+                {"str", "abc"},
+                {"arr", json::array({1, 2, 3})},
+                {"obj", json::object({{"key1", 1}, {"key2", 2}, {"key3", 3}})},
+                {"nested", json::object({
+                    {"inner_key1", json::array({1, 2})},
+                    {"inner_key2", json::object({{"a", "x"}, {"b", "y"}})}
+                })},
+                {"mixed", json::object({
+                    {"used", 1},
+                    {"unused", 2},
+                })},
+            }
+        );
+
+        t.assert_true("num is used", val->at("num")->stats.used);
+        t.assert_true("str is used", val->at("str")->stats.used);
+
+        t.assert_true("arr is used", val->at("arr")->stats.used);
+        t.assert_true("arr[0] is used", val->at("arr")->at(0)->stats.used);
+        t.assert_true("arr[1] is not used", !val->at("arr")->at(1)->stats.used);
+
+        t.assert_true("obj is used", val->at("obj")->stats.used);
+        t.assert_true("obj.key1 is used", val->at("obj")->at("key1")->stats.used);
+        t.assert_true("obj.key2 is not used", !val->at("obj")->at("key2")->stats.used);
+
+        t.assert_true("inner_key1[0] is used", val->at("nested")->at("inner_key1")->at(0)->stats.used);
+        t.assert_true("inner_key2.a is used", val->at("nested")->at("inner_key2")->at("a")->stats.used);
+    });
+}
+
 static void test_template_cpp(testing & t, const std::string & name, const std::string & tmpl, const json & vars, const std::string & expect) {
    t.test(name, [&tmpl, &vars, &expect](testing & t) {
        jinja::lexer lexer;
--- a/tools/server/webui/README.md
+++ b/tools/server/webui/README.md
@@ -101,7 +101,7 @@ In a separate terminal, start the backend server:
 ./llama-server -m model.gguf

 # Multi-model (ROUTER mode)
-./llama-server --model-store /path/to/models
+./llama-server --models-dir /path/to/models
 ```

 ### 3. Start Development Servers
Author	SHA1	Message	Date
Xuan-Son Nguyen	5452d736f8	jinja: correct stats for tojson and string filters (#19785 )	2026-02-22 21:08:23 +01:00
Aldehir Rojas	ed4837891d	common : fix improper trimming in XML parser on complete message (#19805 ) Co-authored-by: Jules LEIDELINGER <11395311+julio75012@users.noreply.github.com>	2026-02-22 17:34:54 +01:00
Kilian Krampf	cacc371f99	Fix wrong cli-argument in documentation (#19804 )	2026-02-22 16:26:33 +01:00
HelloKS	ae2368e74e	model : add Kanana-2 model support (#19803 ) * model: Add Kanana-2 model support * lint: adjust spacing	2026-02-22 16:15:02 +01:00
Sigbjørn Skjæret	9f0684f003	ci : fix rocm archive name [no ci] (#19808 )	2026-02-22 16:14:37 +01:00