Compare commits

...

5 Commits
b8126 ... b8131

Author SHA1 Message Date
Xuan-Son Nguyen
5452d736f8 jinja: correct stats for tojson and string filters (#19785) 2026-02-22 21:08:23 +01:00
Aldehir Rojas
ed4837891d common : fix improper trimming in XML parser on complete message (#19805)
Co-authored-by: Jules LEIDELINGER <11395311+julio75012@users.noreply.github.com>
2026-02-22 17:34:54 +01:00
Kilian Krampf
cacc371f99 Fix wrong cli-argument in documentation (#19804) 2026-02-22 16:26:33 +01:00
HelloKS
ae2368e74e model : add Kanana-2 model support (#19803)
* model: Add Kanana-2 model support

* lint: adjust spacing
2026-02-22 16:15:02 +01:00
Sigbjørn Skjæret
9f0684f003 ci : fix rocm archive name [no ci] (#19808) 2026-02-22 16:14:37 +01:00
11 changed files with 112 additions and 13 deletions

View File

@@ -604,7 +604,7 @@ jobs:
id: pack_artifacts
run: |
cp LICENSE ./build/bin/
tar -czvf llama-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
- name: Upload artifacts
uses: actions/upload-artifact@v6

View File

@@ -803,7 +803,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
}
// remove potential partial suffix
if (builder.pos() == builder.input().size()) {
if (builder.pos() == builder.input().size() && builder.is_partial()) {
if (unclosed_reasoning_content.empty()) {
rstrip(content);
trim_potential_partial_word(content);

View File

@@ -85,7 +85,7 @@ value identifier::execute_impl(context & ctx) {
auto builtins = global_builtins();
if (!it->is_undefined()) {
if (ctx.is_get_stats) {
it->stats.used = true;
value_t::stats_t::mark_used(it);
}
JJ_DEBUG("Identifier '%s' found, type = %s", val.c_str(), it->type().c_str());
return it;
@@ -277,7 +277,7 @@ value binary_expression::execute_impl(context & ctx) {
static value try_builtin_func(context & ctx, const std::string & name, value & input, bool undef_on_missing = false) {
JJ_DEBUG("Trying built-in function '%s' for type %s", name.c_str(), input->type().c_str());
if (ctx.is_get_stats) {
input->stats.used = true;
value_t::stats_t::mark_used(input);
input->stats.ops.insert(name);
}
auto builtins = input->get_builtins();
@@ -448,7 +448,7 @@ value for_statement::execute_impl(context & ctx) {
// mark the variable being iterated as used for stats
if (ctx.is_get_stats) {
iterable_val->stats.used = true;
value_t::stats_t::mark_used(iterable_val);
iterable_val->stats.ops.insert("array_access");
}
@@ -470,7 +470,7 @@ value for_statement::execute_impl(context & ctx) {
items.push_back(std::move(tuple));
}
if (ctx.is_get_stats) {
iterable_val->stats.used = true;
value_t::stats_t::mark_used(iterable_val);
iterable_val->stats.ops.insert("object_access");
}
} else {
@@ -480,7 +480,7 @@ value for_statement::execute_impl(context & ctx) {
items.push_back(item);
}
if (ctx.is_get_stats) {
iterable_val->stats.used = true;
value_t::stats_t::mark_used(iterable_val);
iterable_val->stats.ops.insert("array_access");
}
}
@@ -817,8 +817,9 @@ value member_expression::execute_impl(context & ctx) {
}
if (ctx.is_get_stats && val && object && property) {
val->stats.used = true;
object->stats.used = true;
value_t::stats_t::mark_used(val);
value_t::stats_t::mark_used(object);
value_t::stats_t::mark_used(property);
if (is_val<value_int>(property)) {
object->stats.ops.insert("array_access");
} else if (is_val<value_string>(property)) {

View File

@@ -161,6 +161,11 @@ static value tojson(const func_args & args) {
value val_separators = args.get_kwarg_or_pos("separators", 3);
value val_sort = args.get_kwarg_or_pos("sort_keys", 4);
int indent = -1;
if (args.ctx.is_get_stats) {
// mark as used (recursively) for stats
auto val_input = args.get_pos(0);
value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
}
if (is_val<value_int>(val_indent)) {
indent = static_cast<int>(val_indent->as_int());
}
@@ -891,6 +896,11 @@ const func_builtins & value_array_t::get_builtins() const {
}},
{"string", [](const func_args & args) -> value {
args.ensure_vals<value_array>();
if (args.ctx.is_get_stats) {
// mark as used (recursively) for stats
auto val_input = args.get_pos(0);
value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
}
return mk_val<value_string>(args.get_pos(0)->as_string());
}},
{"tojson", tojson},
@@ -1046,6 +1056,11 @@ const func_builtins & value_object_t::get_builtins() const {
{"tojson", tojson},
{"string", [](const func_args & args) -> value {
args.ensure_vals<value_object>();
if (args.ctx.is_get_stats) {
// mark as used (recursively) for stats
auto val_input = args.get_pos(0);
value_t::stats_t::mark_used(const_cast<value&>(val_input), true);
}
return mk_val<value_string>(args.get_pos(0)->as_string());
}},
{"length", [](const func_args & args) -> value {
@@ -1358,4 +1373,21 @@ std::string value_to_string_repr(const value & val) {
}
}
// stats utility
void value_t::stats_t::mark_used(value & val, bool deep) {
val->stats.used = true;
if (deep) {
if (is_val<value_array>(val)) {
for (auto & item : val->val_arr) {
mark_used(item, deep);
}
} else if (is_val<value_object>(val)) {
for (auto & pair : val->val_obj) {
mark_used(pair.first, deep);
mark_used(pair.second, deep);
}
}
}
}
} // namespace jinja

View File

@@ -118,6 +118,8 @@ struct value_t {
bool used = false;
// ops can be builtin calls or operators: "array_access", "object_access"
std::set<std::string> ops;
// utility to recursively mark value and its children as used
static void mark_used(value & val, bool deep = false);
} stats;
value_t() = default;

View File

@@ -1274,6 +1274,9 @@ class TextModel(ModelBase):
if chkhsh == "b4b8ca1f9769494fbd956ebc4c249de6131fb277a4a3345a7a92c7dd7a55808d":
# ref: https://huggingface.co/jdopensource/JoyAI-LLM-Flash
res = "joyai-llm"
if chkhsh == "e4d54df1ebc1f2b91acd986c5b51aa50837d5faf7c7398e73c1f9e9ee5d19869":
# ref: https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601
res = "kanana2"
if res is None:
logger.warning("\n")

View File

@@ -152,6 +152,7 @@ models = [
{"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", },
{"name": "qwen35", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3.5-9B-Instruct", },
{"name": "joyai-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
{"name": "kanana2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
]
# some models are known to be broken upstream, so we will skip them as exceptions

View File

@@ -1703,8 +1703,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
} break;
case LLM_ARCH_DEEPSEEK2:
{
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B, Kanana-2-30B-A3B
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26 || (hparams.n_layer == 48 && n_vocab == 128256));
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);

View File

@@ -2027,7 +2027,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
} else if (
tokenizer_pre == "gpt-4o" ||
tokenizer_pre == "llama4") {
tokenizer_pre == "llama4" ||
tokenizer_pre == "kanana2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
clean_spaces = false;
} else if (

View File

@@ -32,6 +32,7 @@ static void test_string_methods(testing & t);
static void test_array_methods(testing & t);
static void test_object_methods(testing & t);
static void test_hasher(testing & t);
static void test_stats(testing & t);
static void test_fuzzing(testing & t);
static bool g_python_mode = false;
@@ -70,6 +71,7 @@ int main(int argc, char *argv[]) {
t.test("object methods", test_object_methods);
if (!g_python_mode) {
t.test("hasher", test_hasher);
t.test("stats", test_stats);
t.test("fuzzing", test_fuzzing);
}
@@ -1795,6 +1797,63 @@ static void test_hasher(testing & t) {
});
}
static void test_stats(testing & t) {
static auto get_stats = [](const std::string & tmpl, const json & vars) -> jinja::value {
jinja::lexer lexer;
auto lexer_res = lexer.tokenize(tmpl);
jinja::program prog = jinja::parse_from_tokens(lexer_res);
jinja::context ctx(tmpl);
jinja::global_from_json(ctx, json{{ "val", vars }}, true);
ctx.is_get_stats = true;
jinja::runtime runtime(ctx);
runtime.execute(prog);
return ctx.get_val("val");
};
t.test("stats", [](testing & t) {
jinja::value val = get_stats(
"{{val.num}} "
"{{val.str}} "
"{{val.arr[0]}} "
"{{val.obj.key1}} "
"{{val.nested | tojson}}",
// Note: the json below will be wrapped inside "val" in the context
json{
{"num", 1},
{"str", "abc"},
{"arr", json::array({1, 2, 3})},
{"obj", json::object({{"key1", 1}, {"key2", 2}, {"key3", 3}})},
{"nested", json::object({
{"inner_key1", json::array({1, 2})},
{"inner_key2", json::object({{"a", "x"}, {"b", "y"}})}
})},
{"mixed", json::object({
{"used", 1},
{"unused", 2},
})},
}
);
t.assert_true("num is used", val->at("num")->stats.used);
t.assert_true("str is used", val->at("str")->stats.used);
t.assert_true("arr is used", val->at("arr")->stats.used);
t.assert_true("arr[0] is used", val->at("arr")->at(0)->stats.used);
t.assert_true("arr[1] is not used", !val->at("arr")->at(1)->stats.used);
t.assert_true("obj is used", val->at("obj")->stats.used);
t.assert_true("obj.key1 is used", val->at("obj")->at("key1")->stats.used);
t.assert_true("obj.key2 is not used", !val->at("obj")->at("key2")->stats.used);
t.assert_true("inner_key1[0] is used", val->at("nested")->at("inner_key1")->at(0)->stats.used);
t.assert_true("inner_key2.a is used", val->at("nested")->at("inner_key2")->at("a")->stats.used);
});
}
static void test_template_cpp(testing & t, const std::string & name, const std::string & tmpl, const json & vars, const std::string & expect) {
t.test(name, [&tmpl, &vars, &expect](testing & t) {
jinja::lexer lexer;

View File

@@ -101,7 +101,7 @@ In a separate terminal, start the backend server:
./llama-server -m model.gguf
# Multi-model (ROUTER mode)
./llama-server --model-store /path/to/models
./llama-server --models-dir /path/to/models
```
### 3. Start Development Servers