Compare commits

...

10 Commits

Author SHA1 Message Date
Georgi Gerganov
e47a082fc9 security : add collaborator guidance 2025-12-16 10:16:46 +02:00
Chris Peterson
2aa45ef9e3 llama: Include algorithm header needed for C++23 (#18078) 2025-12-16 09:37:55 +02:00
Georgi Gerganov
c560316440 graph : reuse SSM graphs (#16490)
* graph : reuse hybrid graphs

* graph : reuse recurrent graphs

* graph : fix reuse check for recurrent inputs

* memory : move the recurrent state into the memory context

* Revert "memory : move the recurrent state into the memory context"

This reverts commit 00f115fe810815d4a22a6dee0acc346131e970e1.

* cont : fix build
2025-12-16 09:36:21 +02:00
Sigbjørn Skjæret
d6742125c3 ci : separate webui from server (#18072)
* separate webui from server

* add public to path
2025-12-16 08:17:26 +01:00
Aleksander Grygier
3034836d36 webui: Improve copy to clipboard with text attachments (#17969)
* feat: Create copy/paste user message including "pasted text" attachments

* chore: update webui build output

* chore: update webui static output

* fix: UI issues

* chore: update webui static output

* fix: Decode HTML entities using `DOMParser`

* chore: update webui build output

* chore: update webui static output
2025-12-16 07:38:46 +01:00
Aleksander Grygier
a20979d433 webui: Add setting to always show sidebar on Desktop (#17809)
* feat: Add setting to always show Sidebar on Desktop

* chore: update webui build output

* feat: Add auto-show sidebar setting

* fix: Mobile settings dialog UI

* chore: update webui build output

* feat: UI label update

* chore: update webui build output

* chore: update webui build output

* chore: update webui build output

* refactor: Cleanup

* chore: update webui build output
2025-12-16 07:31:37 +01:00
Daniel Bevenius
2995341730 llama : add support for NVIDIA Nemotron 3 Nano (#18058)
* llama : add support for NVIDIA Nemotron Nano 3

This commit adds support for the NVIDIA Nemotron Nano 3 model, enabling
the conversion and running of this model.

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-12-16 07:19:26 +01:00
Darius Lukas
40d9c394f4 Webui: Disable attachment button and model selector button when prompt textbox is disabled. (#17925)
* Pass disabled state to the file attachments button and the model
selector button.

* Update index.html.gz

* Fix model info card in non-router mode.

* Update index.html.gz
2025-12-16 07:15:49 +01:00
Sigbjørn Skjæret
d6a1e18c65 convert : move rope_parameters to TextModel class (#18061)
* make sure to search text_config for rope parameters

* move rope_parameters to TextModel class
2025-12-15 22:03:16 +01:00
Shouyu
c45f89d551 ggml-hexagon: mm for mtmd (#17894)
* feat: add run_mtmd script for hexagon

* fix: fix issue in fp16xfp32 mm

* fix: remove opt_experiment for fp16xfp32 mm

* fix: ggml-hexagon: matmul fp16xfp32 support non-contigious src0

* fix: fix syntax check for run-mtmd.sh for cli
2025-12-15 10:53:56 -08:00
39 changed files with 1744 additions and 625 deletions

295
.github/workflows/server-webui.yml vendored Normal file
View File

@@ -0,0 +1,295 @@
# Server WebUI build and tests
name: Server WebUI
on:
workflow_dispatch: # allows manual triggering
inputs:
sha:
description: 'Commit SHA1 to build'
required: false
type: string
slow_tests:
description: 'Run slow tests'
required: true
type: boolean
push:
branches:
- master
paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**']
pull_request:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**']
env:
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
LLAMA_LOG_VERBOSITY: 10
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
webui-setup:
name: WebUI Setup
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Cache node_modules
uses: actions/cache@v4
id: cache-node-modules
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Install dependencies
if: steps.cache-node-modules.outputs.cache-hit != 'true'
run: npm ci
working-directory: tools/server/webui
webui-check:
needs: webui-setup
name: WebUI Check
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Run type checking
run: npm run check
working-directory: tools/server/webui
- name: Run linting
run: npm run lint
working-directory: tools/server/webui
webui-build:
needs: webui-check
name: WebUI Build
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Build application
run: npm run build
working-directory: tools/server/webui
webui-tests:
needs: webui-build
name: Run WebUI tests
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Install Playwright browsers
run: npx playwright install --with-deps
working-directory: tools/server/webui
- name: Build Storybook
run: npm run build-storybook
working-directory: tools/server/webui
- name: Run Client tests
run: npm run test:client
working-directory: tools/server/webui
- name: Run Server tests
run: npm run test:server
working-directory: tools/server/webui
- name: Run UI tests
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/server/webui
- name: Run E2E tests
run: npm run test:e2e
working-directory: tools/server/webui
server-build:
needs: [webui-tests]
runs-on: ubuntu-latest
strategy:
matrix:
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
build_type: [RelWithDebInfo]
include:
- build_type: Release
sanitizer: ""
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
steps:
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get -y install \
build-essential \
xxd \
git \
cmake \
curl \
wget \
language-pack-en \
libssl-dev
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r tools/server/tests/requirements.txt
- name: Setup Node.js for WebUI
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Install WebUI dependencies
run: npm ci
working-directory: tools/server/webui
- name: Build WebUI
run: npm run build
working-directory: tools/server/webui
- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DGGML_OPENMP=OFF ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Build (sanitizers)
id: cmake_build_sanitizers
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Build (sanitizers)
id: cmake_build
if: ${{ matrix.sanitizer == '' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Tests
id: server_integration_tests
if: ${{ matrix.sanitizer == '' }}
env:
GITHUB_ACTIONS: "true"
run: |
cd tools/server/tests
./tests.sh
- name: Tests (sanitizers)
id: server_integration_tests_sanitizers
if: ${{ matrix.sanitizer != '' }}
run: |
cd tools/server/tests
LLAMA_SANITIZE=1 ./tests.sh
- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd tools/server/tests
SLOW_TESTS=1 ./tests.sh

View File

@@ -76,270 +76,6 @@ jobs:
run: |
pip install -r tools/server/tests/requirements.txt
webui-setup:
name: WebUI Setup
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Cache node_modules
uses: actions/cache@v4
id: cache-node-modules
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Install dependencies
if: steps.cache-node-modules.outputs.cache-hit != 'true'
run: npm ci
working-directory: tools/server/webui
webui-check:
needs: webui-setup
name: WebUI Check
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Run type checking
run: npm run check
working-directory: tools/server/webui
- name: Run linting
run: npm run lint
working-directory: tools/server/webui
webui-build:
needs: webui-check
name: WebUI Build
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Build application
run: npm run build
working-directory: tools/server/webui
webui-tests:
needs: webui-build
name: Run WebUI tests
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Restore node_modules cache
uses: actions/cache@v4
with:
path: tools/server/webui/node_modules
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-modules-
- name: Install Playwright browsers
run: npx playwright install --with-deps
working-directory: tools/server/webui
- name: Build Storybook
run: npm run build-storybook
working-directory: tools/server/webui
- name: Run Client tests
run: npm run test:client
working-directory: tools/server/webui
- name: Run Server tests
run: npm run test:server
working-directory: tools/server/webui
- name: Run UI tests
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/server/webui
- name: Run E2E tests
run: npm run test:e2e
working-directory: tools/server/webui
server-build:
needs: [webui-tests]
runs-on: ubuntu-latest
strategy:
matrix:
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
build_type: [RelWithDebInfo]
include:
- build_type: Release
sanitizer: ""
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
steps:
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get -y install \
build-essential \
xxd \
git \
cmake \
curl \
wget \
language-pack-en \
libssl-dev
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r tools/server/tests/requirements.txt
- name: Setup Node.js for WebUI
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Install WebUI dependencies
run: npm ci
working-directory: tools/server/webui
- name: Build WebUI
run: npm run build
working-directory: tools/server/webui
- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DGGML_OPENMP=OFF ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Build (sanitizers)
id: cmake_build_sanitizers
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Build (sanitizers)
id: cmake_build
if: ${{ matrix.sanitizer == '' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=ON \
-DLLAMA_BUILD_SERVER=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Tests
id: server_integration_tests
if: ${{ matrix.sanitizer == '' }}
env:
GITHUB_ACTIONS: "true"
run: |
cd tools/server/tests
./tests.sh
- name: Tests (sanitizers)
id: server_integration_tests_sanitizers
if: ${{ matrix.sanitizer != '' }}
run: |
cd tools/server/tests
LLAMA_SANITIZE=1 ./tests.sh
- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd tools/server/tests
SLOW_TESTS=1 ./tests.sh
server-windows:
runs-on: windows-2022

View File

@@ -68,3 +68,6 @@ Please disclose it as a private [security advisory](https://github.com/ggml-org/
Please note that using AI to identify vulnerabilities and generate reports is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before submitting the report.
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
> [!IMPORTANT]
> For collaborators: if you are interested in helping out with reviewing privting security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080

View File

@@ -136,19 +136,11 @@ class ModelBase:
self.remote_hf_model_id = remote_hf_model_id
self.sentence_transformers_dense_modules = sentence_transformers_dense_modules
self.hparams = ModelBase.load_hparams(self.dir_model, self.is_mistral_format) if hparams is None else hparams
self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
self.model_tensors = self.index_tensors(remote_hf_model_id=remote_hf_model_id)
self.metadata_override = metadata_override
self.model_name = model_name
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
# Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
self.rope_parameters["rope_theta"] = rope_theta
if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
self.rope_parameters["rope_type"] = rope_type
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
if self.ftype == gguf.LlamaFileType.GUESSED:
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
@@ -765,6 +757,15 @@ class TextModel(ModelBase):
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
# Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
self.rope_parameters["rope_theta"] = rope_theta
if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
self.rope_parameters["rope_type"] = rope_type
@classmethod
def __init_subclass__(cls):
# can't use an abstract property, because overriding it without type errors
@@ -8489,8 +8490,18 @@ class GraniteHybridModel(Mamba2Model, GraniteMoeModel):
class NemotronHModel(GraniteHybridModel):
"""Hybrid mamba2/attention model from NVIDIA"""
model_arch = gguf.MODEL_ARCH.NEMOTRON_H
is_moe: bool = False
def __init__(self, *args, **kwargs):
# We have to determine the correct model architecture (MoE vs non-MoE) before
# calling the parent __init__. This is because the parent constructor
# uses self.model_arch to build the tensor name map, and all MoE-specific
# mappings would be missed if it were called with the default non-MoE arch.
hparams = ModelBase.load_hparams(args[0], self.is_mistral_format)
if "num_experts_per_tok" in hparams:
self.model_arch = gguf.MODEL_ARCH.NEMOTRON_H_MOE
self.is_moe = True
super().__init__(*args, **kwargs)
# Save the top-level head_dim for later
@@ -8502,9 +8513,11 @@ class NemotronHModel(GraniteHybridModel):
# Update the ssm / attn / mlp layers
# M: Mamba2, *: Attention, -: MLP
# MoE:
# M: Mamba2, *: Attention, E: Expert
hybrid_override_pattern = self.hparams["hybrid_override_pattern"]
self._ssm_layers = [i for i, val in enumerate(hybrid_override_pattern) if val == "M"]
self._mlp_layers = [i for i, val in enumerate(hybrid_override_pattern) if val == "-"]
self._mlp_layers = [i for i, val in enumerate(hybrid_override_pattern) if val == ("E" if self.is_moe else "-")]
def get_attn_layers(self):
hybrid_override_pattern = self.hparams["hybrid_override_pattern"]
@@ -8520,10 +8533,28 @@ class NemotronHModel(GraniteHybridModel):
# Set feed_forward_length
# NOTE: This will trigger an override warning. This is preferrable to
# duplicating all the parent logic
n_ff = self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"])
self.gguf_writer.add_feed_forward_length([
n_ff if i in self._mlp_layers else 0 for i in range(self.block_count)
])
if not self.is_moe:
n_ff = self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"])
self.gguf_writer.add_feed_forward_length([
n_ff if i in self._mlp_layers else 0 for i in range(self.block_count)
])
else:
moe_intermediate_size = self.hparams["moe_intermediate_size"]
self.gguf_writer.add_feed_forward_length([
moe_intermediate_size if i in self._mlp_layers else 0 for i in range(self.block_count)
])
self.gguf_writer.add_expert_used_count(self.hparams["num_experts_per_tok"])
self.gguf_writer.add_expert_feed_forward_length(self.hparams["moe_intermediate_size"])
self.gguf_writer.add_expert_shared_feed_forward_length(self.hparams["moe_shared_expert_intermediate_size"])
self.gguf_writer.add_expert_count(self.hparams["n_routed_experts"])
self.gguf_writer.add_expert_shared_count(self.hparams["n_shared_experts"])
self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
self.gguf_writer.add_expert_group_count(self.hparams["n_group"])
# number of experts used per token (top-k)
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
self.gguf_writer.add_expert_used_count(n_experts_used)
def set_vocab(self):
super().set_vocab()
@@ -8531,7 +8562,81 @@ class NemotronHModel(GraniteHybridModel):
# The tokenizer _does_ add a BOS token (via post_processor type
# TemplateProcessing) but does not set add_bos_token to true in the
# config, so we need to explicitly override it here.
self.gguf_writer.add_add_bos_token(True)
if not self.is_moe:
self.gguf_writer.add_add_bos_token(True)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if self.is_moe and bid is not None:
if name.endswith("mixer.gate.e_score_correction_bias"):
new_name = name.replace("e_score_correction_bias", "e_score_correction.bias")
mapped_name = self.map_tensor_name(new_name)
return [(mapped_name, data_torch)]
if name.endswith("mixer.dt_bias"):
new_name = name.replace("dt_bias", "dt.bias")
mapped_name = self.map_tensor_name(new_name)
return [(mapped_name, data_torch)]
if name.endswith("mixer.conv1d.weight"):
squeezed_data = data_torch.squeeze()
mapped_name = self.map_tensor_name(name)
return [(mapped_name, squeezed_data)]
if name.endswith("mixer.A_log"):
transformed_data = -torch.exp(data_torch)
reshaped_data = transformed_data.squeeze().reshape(-1, 1)
mapped_name = self.map_tensor_name(name)
return [(mapped_name, reshaped_data)]
if name.endswith("mixer.D"):
reshaped_data = data_torch.squeeze().reshape(-1, 1)
mapped_name = self.map_tensor_name(name)
return [(mapped_name, reshaped_data)]
if name.endswith("mixer.norm.weight"):
reshaped_data = data_torch.reshape(8, 512)
mapped_name = self.map_tensor_name(name)
return [(mapped_name, reshaped_data)]
if name.find("mixer.experts") != -1:
n_experts = self.hparams["n_routed_experts"]
assert bid is not None
if self._experts is None:
self._experts = [{} for _ in range(self.block_count)]
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 2:
# merge the experts into a single tensor
tensors: list[tuple[str, Tensor]] = []
for w_name in ["down_proj", "up_proj"]:
datas: list[Tensor] = []
for xid in range(n_experts):
ename = f"backbone.layers.{bid}.mixer.experts.{xid}.{w_name}.weight"
datas.append(self._experts[bid][ename])
del self._experts[bid][ename]
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
new_name = self.map_tensor_name(merged_name)
tensors.append((new_name, data_torch))
return tensors
else:
return []
return super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
if self._experts is not None:
# flatten `list[dict[str, Tensor]]` into `list[str]`
experts = [k for d in self._experts for k in d.keys()]
if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}")
@ModelBase.register("BailingMoeForCausalLM")

View File

@@ -1976,9 +1976,6 @@ static bool ggml_hexagon_supported_mul_mat(const struct ggml_hexagon_session * s
break;
case GGML_TYPE_F16:
if (!opt_experimental) {
return false;
}
break;
default:

View File

@@ -903,7 +903,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
const float * restrict vy = (const float * restrict) y;
for (uint32_t i = 0; i < n; i++) {
rsum += vx[i] * (__fp16) vy[i];
rsum += (float)vx[i] * vy[i];
}
*s = rsum;
return;
@@ -917,7 +917,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
// for some reason we need volatile here so that the compiler doesn't try anything funky
volatile HVX_Vector rsum = Q6_V_vsplat_R(0);
float r_sum_scalar = 0.0f;
uint32_t i = 0;
for (i = 0; i < nv0; i++) {
@@ -926,31 +926,42 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
HVX_Vector x = vx[i];
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), Q6_Vh_vsplat_R(0x3C00)); // mul by 1.0
HVX_Vector hi = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_hi_W(xp)), Q6_V_hi_W(yp));
HVX_Vector lo = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_lo_W(xp)), Q6_V_lo_W(yp));
//NOTE: need volatile here to prevent compiler optimization
// Seem compiler cannot guarantee read-after-write??
volatile HVX_Vector hi = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_hi_W(xp)), Q6_V_hi_W(yp));
volatile HVX_Vector lo = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_lo_W(xp)), Q6_V_lo_W(yp));
HVX_Vector sum = Q6_Vqf32_vadd_Vqf32Vqf32(hi, lo);
rsum = Q6_Vqf32_vadd_Vqf32Vqf32(rsum, sum);
}
if (nv1) {
HVX_VectorPair yp = vy[i];
// HVX_VectorPair yp = vy[i];
HVX_Vector x = vx[i];
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), Q6_Vh_vsplat_R(0x3C00)); // mul by 1.0
// HVX_Vector x = vx[i];
// HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), Q6_Vh_vsplat_R(0x3C00)); // mul by 1.0
if (nv1 >= 32) {
HVX_Vector hi = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_hi_W(xp)), Q6_V_hi_W(yp));
rsum = Q6_Vqf32_vadd_Vqf32Vqf32(rsum, hi);
nv1 -= 32;
}
// if (nv1 >= 32) {
// volatile HVX_Vector hi = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_hi_W(xp)), Q6_V_hi_W(yp));
// rsum = Q6_Vqf32_vadd_Vqf32Vqf32(rsum, hi);
// nv1 -= 32;
// }
// rsum = hvx_vec_qf32_reduce_sum(rsum);
// if (nv1) {
// volatile HVX_Vector lo = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_lo_W(xp)), Q6_V_lo_W(yp));
// HVX_Vector sum = hvx_vec_qf32_reduce_sum_n(lo, nv1);
// rsum = Q6_Vqf32_vadd_Vqf32Vqf32(rsum, sum);
// }
//process the remainder using scalar loop
rsum = hvx_vec_qf32_reduce_sum(rsum);
const __fp16 * restrict sx = (const __fp16 * restrict) x;
const float * restrict sy = (const float * restrict) y;
if (nv1) {
HVX_Vector lo = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_lo_W(xp)), Q6_V_lo_W(yp));
HVX_Vector sum = hvx_vec_qf32_reduce_sum_n(lo, nv1);
rsum = Q6_Vqf32_vadd_Vqf32Vqf32(rsum, sum);
for (uint32_t i = nv0 * 64; i < n; i++) {
r_sum_scalar += (float) sx[i] * sy[i];
}
// hvx_vec_dump_fp16("X", x);
@@ -961,7 +972,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
rsum = hvx_vec_qf32_reduce_sum(rsum);
}
*s = hvx_vec_get_fp32(Q6_Vsf_equals_Vqf32(rsum));
*s = hvx_vec_get_fp32(Q6_Vsf_equals_Vqf32(rsum)) + r_sum_scalar;
# ifdef HTP_DEBUG
{
@@ -1498,9 +1509,6 @@ static void matmul_f16_f32(struct htp_tensor * restrict src0,
uint64_t t1, t2;
t1 = HAP_perf_get_qtimer_count();
const size_t src0_row_size = sizeof(__fp16) * ne00;
const size_t src1_row_size = sizeof(float) * ne10;
assert(ne12 % ne02 == 0);
assert(ne13 % ne03 == 0);
@@ -1510,8 +1518,6 @@ static void matmul_f16_f32(struct htp_tensor * restrict src0,
// This is the size of the rest of the dimensions of the result
const uint32_t nr1 = ne1 * ne2 * ne3;
uint32_t chunk_size = 64;
// distribute the thread work across the inner or outer loop based on which one is larger
uint32_t nchunk0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
uint32_t nchunk1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
@@ -1544,11 +1550,11 @@ static void matmul_f16_f32(struct htp_tensor * restrict src0,
const uint32_t blck_0 = 64;
const uint32_t blck_1 = 64;
float tmp[32];
__attribute__((aligned(128))) float tmp[64];
for (uint32_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
for (uint32_t iir0 = ir0_start; iir0 < ir0_end; iir0 += blck_0) {
for (uint32_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir1_end; ir1++) {
for (uint32_t ir1 = iir1; ir1 < MIN(iir1 + blck_1, ir1_end); ir1++) {
const uint32_t i13 = (ir1 / (ne12 * ne1));
const uint32_t i12 = (ir1 - i13 * ne12 * ne1) / ne1;
const uint32_t i11 = (ir1 - i13 * ne12 * ne1 - i12 * ne1);
@@ -1561,13 +1567,16 @@ static void matmul_f16_f32(struct htp_tensor * restrict src0,
const uint32_t i2 = i12;
const uint32_t i3 = i13;
const uint8_t * restrict src0_row = (const uint8_t *) src0->data + (0 + i02 * nb02 + i03 * nb03);
const uint8_t * restrict src0_base = (const uint8_t *) src0->data + (0 + i02 * nb02 + i03 * nb03);
const uint8_t * restrict src1_col =
(const uint8_t *) src1->data + (i11 + i12 * ne11 + i13 * ne12 * ne11) * src1_row_size;
(const uint8_t *) src1->data + (i11 * nb11 + i12 * nb12 + i13 * nb13);
float * dst_col = (float *) ((uint8_t * restrict) dst->data + (i1 * nb1 + i2 * nb2 + i3 * nb3));
for (uint32_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ir0++) {
vec_dot_f16_f32(ne00, &tmp[ir0 - iir0], src0_row + ir0 * src0_row_size, src1_col);
const uint32_t ir0_block_end = MIN(iir0 + blck_0, ir0_end);
for (uint32_t ir0 = iir0; ir0 < ir0_block_end; ir0++) {
// Use nb01 stride for non-contiguous src0 support
const uint8_t * restrict src0_row = src0_base + ir0 * nb01;
vec_dot_f16_f32(ne00, &tmp[ir0 - iir0], src0_row, src1_col);
}
hvx_copy_fp32_ua((uint8_t *) &dst_col[iir0], (uint8_t *) tmp, MIN(iir0 + blck_0, ir0_end) - iir0);

View File

@@ -413,6 +413,7 @@ class MODEL_ARCH(IntEnum):
JAIS = auto()
NEMOTRON = auto()
NEMOTRON_H = auto()
NEMOTRON_H_MOE = auto()
EXAONE = auto()
EXAONE4 = auto()
GRANITE = auto()
@@ -786,6 +787,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.JAIS: "jais",
MODEL_ARCH.NEMOTRON: "nemotron",
MODEL_ARCH.NEMOTRON_H: "nemotron_h",
MODEL_ARCH.NEMOTRON_H_MOE: "nemotron_h_moe",
MODEL_ARCH.EXAONE: "exaone",
MODEL_ARCH.EXAONE4: "exaone4",
MODEL_ARCH.GRANITE: "granite",
@@ -2529,6 +2531,33 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.NEMOTRON_H_MOE: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.SSM_IN,
MODEL_TENSOR.SSM_CONV1D,
MODEL_TENSOR.SSM_DT,
MODEL_TENSOR.SSM_A,
MODEL_TENSOR.SSM_D,
MODEL_TENSOR.SSM_NORM,
MODEL_TENSOR.SSM_OUT,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
# experts
MODEL_TENSOR.FFN_GATE_INP,
MODEL_TENSOR.FFN_UP_EXP,
MODEL_TENSOR.FFN_DOWN_EXP,
# shared expert
MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHEXP,
MODEL_TENSOR.FFN_EXP_PROBS_B,
],
MODEL_ARCH.EXAONE: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,

View File

@@ -379,6 +379,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.gate", # lfm2moe
"model.layers.{bid}.mlp.router.gate", # afmoe
"layers.{bid}.gate", # mistral-large
"backbone.layers.{bid}.mixer.gate", # nemotron-h-moe
),
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -392,6 +393,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.expert_bias", # afmoe
"model.layers.{bid}.feed_forward.expert_bias", # lfm2moe
"model.layers.{bid}.block_sparse_moe.e_score_correction", # minimax-m2
"backbone.layers.{bid}.mixer.gate.e_score_correction" # nemotron-h-moe
),
# Feed-forward up
@@ -440,7 +442,7 @@ class TensorNameMap:
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) ernie4.5-moe
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) ernie4.5-moe, nemotron-h-moe (merged)
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
"model.layers.{bid}.feed_forward.experts.up_proj", # llama4
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
@@ -454,6 +456,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.down_proj",
"model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
"layers.{bid}.shared_experts.w3", # mistral-large
"backbone.layers.{bid}.mixer.shared_experts.up_proj", # nemotron-h-moe
),
MODEL_TENSOR.FFN_UP_CHEXP: (
@@ -548,7 +551,7 @@ class TensorNameMap:
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) ernie4.5-moe
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) ernie4.5-moe nemotron-h-moe (merged)
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
"model.layers.{bid}.feed_forward.experts.down_proj", # llama4
@@ -563,6 +566,7 @@ class TensorNameMap:
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
"model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
"layers.{bid}.shared_experts.w2", # mistral-large
"backbone.layers.{bid}.mixer.shared_experts.down_proj", # nemotron-h-moe
),
MODEL_TENSOR.FFN_DOWN_CHEXP: (
@@ -706,6 +710,7 @@ class TensorNameMap:
"model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid
"model.layers.layers.{bid}.mixer.dt_proj", # plamo2
"model.layers.{bid}.linear_attn.dt_proj", # qwen3next
"backbone.layers.{bid}.mixer.dt", # nemotron-h-moe
),
MODEL_TENSOR.SSM_DT_NORM: (

View File

@@ -0,0 +1,65 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
model="gemma-3-4b-it-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
mmproj="mmproj-F16.gguf"
[ "$MMPROJ" != "" ] && mmproj="$MMPROJ"
image=
[ "$IMG" != "" ] && image="$IMG"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
experimental="GGML_HEXAGON_EXPERIMENTAL=1"
[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1"
opmask=
[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
# MTMD backend device for vision model (defaults to CPU if not set)
mtmd_backend=
[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE"
set -x
adb $adbserial shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $experimental $sched $opmask $profile $nhvx $ndev $mtmd_backend \
./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model \
--mmproj $basedir/../gguf/$mmproj \
--image $basedir/../gguf/$image \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on \
-ngl 99 --device $device -v $cli_opts $@ \
"

View File

@@ -75,6 +75,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_JAIS, "jais" },
{ LLM_ARCH_NEMOTRON, "nemotron" },
{ LLM_ARCH_NEMOTRON_H, "nemotron_h" },
{ LLM_ARCH_NEMOTRON_H_MOE, "nemotron_h_moe" },
{ LLM_ARCH_EXAONE, "exaone" },
{ LLM_ARCH_EXAONE4, "exaone4" },
{ LLM_ARCH_RWKV6, "rwkv6" },
@@ -1763,6 +1764,39 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_NEMOTRON_H_MOE,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
// mamba(2) ssm layers
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
// attention layers
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
// dense FFN
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
// MoE FFN (for MoE layers)
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_EXP_PROBS_B,"blk.%d.exp_probs_b" },
// MoE shared expert layer
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
},
},
{
LLM_ARCH_EXAONE,
{
@@ -2817,6 +2851,7 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
case LLM_ARCH_LFM2:
case LLM_ARCH_LFM2MOE:
case LLM_ARCH_NEMOTRON_H:
case LLM_ARCH_NEMOTRON_H_MOE:
case LLM_ARCH_QWEN3NEXT:
return true;
default:

View File

@@ -79,6 +79,7 @@ enum llm_arch {
LLM_ARCH_JAIS,
LLM_ARCH_NEMOTRON,
LLM_ARCH_NEMOTRON_H,
LLM_ARCH_NEMOTRON_H_MOE,
LLM_ARCH_EXAONE,
LLM_ARCH_EXAONE4,
LLM_ARCH_RWKV6,

View File

@@ -254,6 +254,24 @@ void llm_graph_input_rs::set_input(const llama_ubatch * ubatch) {
}
}
bool llm_graph_input_rs::can_reuse(const llm_graph_params & params) {
const auto * mctx = static_cast<const llama_memory_recurrent_context *>(params.mctx);
this->mctx = mctx;
bool res = true;
res &= s_copy->ne[0] == mctx->get_n_rs();
res &= s_copy_main->ne[0] == params.ubatch.n_seqs;
res &= s_copy_extra->ne[0] == mctx->get_n_rs() - params.ubatch.n_seqs;
res &= head == mctx->get_head();
res &= rs_z == mctx->get_rs_z();
return res;
}
void llm_graph_input_cross_embd::set_input(const llama_ubatch * ubatch) {
GGML_UNUSED(ubatch);
@@ -461,8 +479,46 @@ void llm_graph_input_attn_cross::set_input(const llama_ubatch * ubatch) {
}
void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
inp_attn->set_input(ubatch);
inp_rs->set_input(ubatch);
mctx->get_attn()->set_input_k_idxs(inp_attn->self_k_idxs, ubatch);
mctx->get_attn()->set_input_v_idxs(inp_attn->self_v_idxs, ubatch);
mctx->get_attn()->set_input_kq_mask(inp_attn->self_kq_mask, ubatch, cparams.causal_attn);
const int64_t n_rs = mctx->get_recr()->get_n_rs();
if (inp_rs->s_copy) {
GGML_ASSERT(ggml_backend_buffer_is_host(inp_rs->s_copy->buffer));
int32_t * data = (int32_t *) inp_rs->s_copy->data;
// assuming copy destinations ALWAYS happen ONLY on the cells between head and head+n
for (uint32_t i = 0; i < n_rs; ++i) {
data[i] = mctx->get_recr()->s_copy(i);
}
}
}
bool llm_graph_input_mem_hybrid::can_reuse(const llm_graph_params & params) {
const auto * mctx = static_cast<const llama_memory_hybrid_context *>(params.mctx);
this->mctx = mctx;
bool res = true;
res &= inp_attn->self_k_idxs->ne[0] == params.ubatch.n_tokens;
//res &= inp_attn->self_v_idxs->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there
res &= inp_attn->self_kq_mask->ne[0] == mctx->get_attn()->get_n_kv();
res &= inp_attn->self_kq_mask->ne[1] == params.ubatch.n_tokens;
res &= inp_rs->s_copy->ne[0] == mctx->get_recr()->get_n_rs();
res &= inp_rs->s_copy_main->ne[0] == params.ubatch.n_seqs;
res &= inp_rs->s_copy_extra->ne[0] == mctx->get_recr()->get_n_rs() - params.ubatch.n_seqs;
res &= inp_rs->head == mctx->get_recr()->get_head();
res &= inp_rs->rs_z == mctx->get_recr()->get_rs_z();
return res;
}
//
@@ -1089,6 +1145,15 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
cur = ggml_relu(ctx0, cur);
cb(cur, "ffn_moe_relu", il);
} break;
case LLM_FFN_RELU_SQR:
if (gate_exps) {
// TODO: add support for gated squared relu
GGML_ABORT("fatal error: gated squared relu not implemented");
} else {
cur = ggml_relu(ctx0, cur);
cur = ggml_sqr(ctx0, cur);
cb(cur, "ffn_moe_relu_sqr", il);
} break;
default:
GGML_ABORT("fatal error");
}
@@ -1841,6 +1906,9 @@ static std::unique_ptr<llm_graph_input_rs> build_rs_inp_impl(
inp->s_copy_main = ggml_view_1d(ctx0, inp->s_copy, n_seqs, 0);
inp->s_copy_extra = ggml_view_1d(ctx0, inp->s_copy, n_rs - n_seqs, n_seqs * inp->s_copy->nb[0]);
inp->head = mctx_cur->get_head();
inp->rs_z = mctx_cur->get_rs_z();
return inp;
}
@@ -1909,10 +1977,10 @@ ggml_tensor * llm_graph_context::build_rwkv_token_shift_store(
llm_graph_input_mem_hybrid * llm_graph_context::build_inp_mem_hybrid() const {
const auto * mctx_cur = static_cast<const llama_memory_hybrid_context *>(mctx);
auto inp_rs = build_rs_inp_impl(ctx0, ubatch, mctx_cur->get_recr());
auto inp_rs = build_rs_inp_impl (ctx0, ubatch, mctx_cur->get_recr());
auto inp_attn = build_attn_inp_kv_impl(ctx0, ubatch, hparams, cparams, mctx_cur->get_attn());
auto inp = std::make_unique<llm_graph_input_mem_hybrid>(std::move(inp_attn), std::move(inp_rs), mctx_cur);
auto inp = std::make_unique<llm_graph_input_mem_hybrid>(cparams, std::move(inp_attn), std::move(inp_rs), mctx_cur);
return (llm_graph_input_mem_hybrid *) res->add_input(std::move(inp));
}

View File

@@ -225,6 +225,8 @@ public:
void set_input(const llama_ubatch * ubatch) override;
bool can_reuse(const llm_graph_params & params) override;
ggml_tensor * s_copy; // I32 [n_rs]
// views of s_copy, computed once per graph
@@ -233,6 +235,10 @@ public:
ggml_tensor * s_copy_extra; // I32 [n_rs - n_seqs]
const llama_memory_recurrent_context * mctx;
// used in view offsets, need to match for valid graph reuse
uint32_t head;
int32_t rs_z;
};
class llm_graph_input_cross_embd : public llm_graph_input_i {
@@ -365,22 +371,28 @@ public:
class llm_graph_input_mem_hybrid : public llm_graph_input_i {
public:
llm_graph_input_mem_hybrid(
const llama_cparams & cparams,
std::unique_ptr<llm_graph_input_attn_kv> inp_attn,
std::unique_ptr<llm_graph_input_rs> inp_rs,
const llama_memory_hybrid_context * mctx) :
std::unique_ptr<llm_graph_input_rs> inp_rs,
const llama_memory_hybrid_context * mctx) :
inp_attn(std::move(inp_attn)),
inp_rs(std::move(inp_rs)),
cparams(cparams),
mctx(mctx) { }
virtual ~llm_graph_input_mem_hybrid() = default;
void set_input(const llama_ubatch * ubatch) override;
bool can_reuse(const llm_graph_params & params) override;
std::unique_ptr<llm_graph_input_attn_kv> inp_attn;
std::unique_ptr<llm_graph_input_rs> inp_rs;
llm_graph_input_attn_kv * get_attn() const { return inp_attn.get(); }
llm_graph_input_rs * get_recr() const { return inp_rs.get(); }
const llama_cparams cparams;
const llama_memory_hybrid_context * mctx;
};

View File

@@ -2,6 +2,7 @@
#include "ggml.h"
#include <algorithm>
#include <cassert>
void llama_hparams::set_swa_pattern(uint32_t n_pattern, bool dense_first) {

View File

@@ -222,7 +222,7 @@ llama_memory_hybrid_context::llama_memory_hybrid_context(
ubatches(std::move(ubatches)),
// note: here we copy the ubatches. not sure if this is ideal
ctx_attn(new llama_kv_cache_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)),
ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(), this->ubatches)),
ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(), this->ubatches)),
status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) {
}

View File

@@ -120,6 +120,7 @@ const char * llm_type_name(llm_type type) {
case LLM_TYPE_16B_A1B: return "16B.A1B";
case LLM_TYPE_21B_A3B: return "21B.A3B";
case LLM_TYPE_30B_A3B: return "30B.A3B";
case LLM_TYPE_31B_A3_5B: return "31B.A3.5B";
case LLM_TYPE_80B_A3B: return "80B.A3B";
case LLM_TYPE_100B_A6B: return "100B.A6B";
case LLM_TYPE_106B_A12B: return "106B.A12B";
@@ -1797,6 +1798,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_NEMOTRON_H:
case LLM_ARCH_NEMOTRON_H_MOE:
{
ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
@@ -1812,7 +1814,14 @@ void llama_model::load_hparams(llama_model_loader & ml) {
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false);
ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, false);
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared, false);
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale, false);
switch (hparams.n_layer) {
case 52: type = LLM_TYPE_31B_A3_5B; break; // Nemotron-H_MOE 31B
case 56: type = LLM_TYPE_9B; break;
default: type = LLM_TYPE_UNKNOWN;
}
@@ -5159,6 +5168,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
}
} break;
case LLM_ARCH_NEMOTRON_H:
case LLM_ARCH_NEMOTRON_H_MOE:
{
// mamba2 Mixer SSM params
// NOTE: int64_t for tensor dimensions
@@ -5169,6 +5179,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
const int64_t n_group = hparams.ssm_n_group;
const int64_t d_in_proj = 2*d_inner + 2*n_group*d_state + n_ssm_head;
const int64_t n_ff_exp = hparams.n_ff_exp ? hparams.n_ff_exp : n_ff / n_expert_used;
const int64_t n_ff_shexp = hparams.n_ff_shexp;
// embeddings
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
@@ -5218,12 +5231,26 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_k_gqa_i}, TENSOR_NOT_REQUIRED);
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_v_gqa_i}, TENSOR_NOT_REQUIRED);
layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
} else {
// mlp layers
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { hparams.n_ff(i), n_embd}, 0);
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, hparams.n_ff(i)}, 0);
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {hparams.n_ff(i)}, TENSOR_NOT_REQUIRED);
} else {
if (n_expert != 0) {
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), { n_embd, n_expert}, 0);
layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert }, 0);
// MoE branch
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp, n_embd, n_expert}, 0);
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}, 0);
// Shared expert branch
layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {n_ff_shexp, n_embd}, 0);
layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, n_ff_shexp}, 0);
} else {
// mlp layers
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { hparams.n_ff(i), n_embd}, 0);
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, hparams.n_ff(i)}, 0);
layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {hparams.n_ff(i)}, TENSOR_NOT_REQUIRED);
}
}
}
} break;
@@ -6850,7 +6877,8 @@ void llama_model::print_info() const {
arch == LLM_ARCH_PLAMO2 ||
arch == LLM_ARCH_GRANITE_HYBRID ||
arch == LLM_ARCH_QWEN3NEXT ||
arch == LLM_ARCH_NEMOTRON_H) {
arch == LLM_ARCH_NEMOTRON_H ||
arch == LLM_ARCH_NEMOTRON_H_MOE) {
LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv);
LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner);
LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
@@ -6905,7 +6933,8 @@ void llama_model::print_info() const {
if (arch == LLM_ARCH_MINICPM ||
arch == LLM_ARCH_GRANITE ||
arch == LLM_ARCH_GRANITE_MOE ||
arch == LLM_ARCH_GRANITE_HYBRID) {
arch == LLM_ARCH_GRANITE_HYBRID ||
arch == LLM_ARCH_NEMOTRON_H_MOE) {
LLAMA_LOG_INFO("%s: f_embedding_scale = %f\n", __func__, hparams.f_embedding_scale);
LLAMA_LOG_INFO("%s: f_residual_scale = %f\n", __func__, hparams.f_residual_scale);
LLAMA_LOG_INFO("%s: f_attention_scale = %f\n", __func__, hparams.f_attention_scale);
@@ -7086,7 +7115,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
if (arch == LLM_ARCH_FALCON_H1) {
filter_attn = [&](int32_t) { return true; };
filter_recr = [&](int32_t) { return true; };
} else if (arch == LLM_ARCH_NEMOTRON_H) {
} else if (arch == LLM_ARCH_NEMOTRON_H || arch == LLM_ARCH_NEMOTRON_H_MOE) {
filter_attn = [&](int32_t il) {
return !hparams.is_recurrent(il) && hparams.n_ff(il) == 0;
};
@@ -7457,6 +7486,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
llm = std::make_unique<llm_build_nemotron>(*this, params);
} break;
case LLM_ARCH_NEMOTRON_H:
case LLM_ARCH_NEMOTRON_H_MOE:
{
llm = std::make_unique<llm_build_nemotron_h>(*this, params);
} break;
@@ -7741,6 +7771,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_ARWKV7:
case LLM_ARCH_WAVTOKENIZER_DEC:
case LLM_ARCH_NEMOTRON_H:
case LLM_ARCH_NEMOTRON_H_MOE:
return LLAMA_ROPE_TYPE_NONE;
// use what we call a normal RoPE, operating on pairs of consecutive head values

View File

@@ -113,6 +113,7 @@ enum llm_type {
LLM_TYPE_16B_A1B,
LLM_TYPE_21B_A3B, // Ernie MoE small
LLM_TYPE_30B_A3B,
LLM_TYPE_31B_A3_5B,
LLM_TYPE_80B_A3B, // Qwen3 Next
LLM_TYPE_100B_A6B,
LLM_TYPE_106B_A12B, // GLM-4.5-Air

View File

@@ -107,12 +107,41 @@ ggml_tensor * llm_build_nemotron_h::build_attention_layer(ggml_tensor *
}
ggml_tensor * llm_build_nemotron_h::build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il) {
cur = build_ffn(cur,
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
NULL, NULL, NULL,
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
NULL, LLM_FFN_RELU_SQR, LLM_FFN_PAR, il);
cb(cur, "ffn_out", il);
if (model.layers[il].ffn_gate_inp == nullptr) {
cur = build_ffn(cur,
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
NULL, NULL, NULL,
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
NULL,
LLM_FFN_RELU_SQR, LLM_FFN_PAR, il);
cb(cur, "ffn_out", il);
} else {
ggml_tensor * ffn_inp = cur;
ggml_tensor * moe_out =
build_moe_ffn(ffn_inp,
model.layers[il].ffn_gate_inp,
model.layers[il].ffn_up_exps,
nullptr, // no gate
model.layers[il].ffn_down_exps,
model.layers[il].ffn_exp_probs_b,
n_expert, n_expert_used,
LLM_FFN_RELU_SQR, hparams.expert_weights_norm,
true, hparams.expert_weights_scale,
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID,
il);
cb(moe_out, "ffn_moe_out", il);
ggml_tensor * ffn_shexp = build_ffn(ffn_inp,
model.layers[il].ffn_up_shexp, NULL, NULL,
NULL /* no gate */ , NULL, NULL,
model.layers[il].ffn_down_shexp, NULL, NULL,
NULL,
LLM_FFN_RELU_SQR, LLM_FFN_PAR, il);
cb(ffn_shexp, "ffn_shexp", il);
cur = ggml_add(ctx0, moe_out, ffn_shexp);
cb(cur, "ffn_out", il);
}
cur = build_cvec(cur, il);
cb(cur, "l_out", il);

Binary file not shown.

View File

@@ -619,11 +619,12 @@ flowchart TB
### Test Types
| Type | Tool | Location | Command |
| ------------- | ------------------ | -------------------------------- | ------------------- |
| **E2E** | Playwright | `tests/e2e/` | `npm run test:e2e` |
| **Unit** | Vitest | `tests/client/`, `tests/server/` | `npm run test:unit` |
| **UI/Visual** | Storybook + Vitest | `tests/stories/` | `npm run test:ui` |
| Type | Tool | Location | Command |
| ------------- | ------------------ | ---------------- | ------------------- |
| **Unit** | Vitest | `tests/unit/` | `npm run test:unit` |
| **UI/Visual** | Storybook + Vitest | `tests/stories/` | `npm run test:ui` |
| **E2E** | Playwright | `tests/e2e/` | `npm run test:e2e` |
| **Client** | Vitest | `tests/client/`. | `npm run test:unit` |
### Running Tests

View File

@@ -13,12 +13,11 @@
"reset": "rm -rf .svelte-kit node_modules",
"format": "prettier --write .",
"lint": "prettier --check . && eslint .",
"test": "npm run test:ui -- --run && npm run test:client -- --run && npm run test:server -- --run && npm run test:e2e",
"test": "npm run test:ui -- --run && npm run test:client -- --run && npm run test:unit -- --run && npm run test:e2e",
"test:e2e": "playwright test",
"test:client": "vitest --project=client",
"test:server": "vitest --project=server",
"test:unit": "vitest --project=unit",
"test:ui": "vitest --project=ui",
"test:unit": "vitest",
"storybook": "storybook dev -p 6006",
"build-storybook": "storybook build",
"cleanup": "rm -rf .svelte-kit build node_modules test-results"

View File

@@ -241,7 +241,7 @@
</div>
{/if}
{:else if (isText || (isPdf && pdfViewMode === 'text')) && displayTextContent}
<SyntaxHighlightedCode code={displayTextContent} {language} maxWidth="69rem" />
<SyntaxHighlightedCode code={displayTextContent} {language} maxWidth="calc(69rem - 2rem)" />
{:else if isAudio}
<div class="flex items-center justify-center p-8">
<div class="w-full max-w-md text-center">

View File

@@ -24,7 +24,7 @@
MimeTypeImage,
MimeTypeText
} from '$lib/enums';
import { isIMEComposing } from '$lib/utils';
import { isIMEComposing, parseClipboardContent } from '$lib/utils';
import {
AudioRecorder,
convertToWav,
@@ -191,7 +191,6 @@
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
// Check if model is selected first
if (!checkModelSelected()) return;
const messageToSend = message.trim();
@@ -228,6 +227,31 @@
const text = event.clipboardData.getData(MimeTypeText.PLAIN);
if (text.startsWith('"')) {
const parsed = parseClipboardContent(text);
if (parsed.textAttachments.length > 0) {
event.preventDefault();
message = parsed.message;
const attachmentFiles = parsed.textAttachments.map(
(att) =>
new File([att.content], att.name, {
type: MimeTypeText.PLAIN
})
);
onFileUpload?.(attachmentFiles);
setTimeout(() => {
textareaRef?.focus();
}, 10);
return;
}
}
if (
text.length > 0 &&
pasteLongTextToFileLength > 0 &&

View File

@@ -35,7 +35,7 @@
<div class="flex items-center gap-1 {className}">
<DropdownMenu.Root>
<DropdownMenu.Trigger name="Attach files">
<DropdownMenu.Trigger name="Attach files" {disabled}>
<Tooltip.Root>
<Tooltip.Trigger>
<Button

View File

@@ -173,6 +173,7 @@
/>
<ModelsSelector
{disabled}
bind:this={selectorModelRef}
currentModel={conversationModel}
forceForegroundText={true}

View File

@@ -1,6 +1,7 @@
<script lang="ts">
import { chatStore } from '$lib/stores/chat.svelte';
import { copyToClipboard, isIMEComposing } from '$lib/utils';
import { config } from '$lib/stores/settings.svelte';
import { copyToClipboard, isIMEComposing, formatMessageForClipboard } from '$lib/utils';
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
import ChatMessageUser from './ChatMessageUser.svelte';
import ChatMessageSystem from './ChatMessageSystem.svelte';
@@ -87,7 +88,9 @@
}
async function handleCopy() {
await copyToClipboard(message.content, 'Message copied to clipboard');
const asPlainText = Boolean(config().copyTextAttachmentsAsPlainText);
const clipboardContent = formatMessageForClipboard(message.content, message.extra, asPlainText);
await copyToClipboard(clipboardContent, 'Message copied to clipboard');
onCopy?.(message);
}

View File

@@ -57,6 +57,11 @@
label: 'Paste long text to file length',
type: 'input'
},
{
key: 'copyTextAttachmentsAsPlainText',
label: 'Copy text attachments as plain text',
type: 'checkbox'
},
{
key: 'enableContinueGeneration',
label: 'Enable "Continue" button',
@@ -109,6 +114,16 @@
key: 'disableAutoScroll',
label: 'Disable automatic scroll',
type: 'checkbox'
},
{
key: 'alwaysShowSidebarOnDesktop',
label: 'Always show sidebar on desktop',
type: 'checkbox'
},
{
key: 'autoShowSidebarOnNewChat',
label: 'Auto-show sidebar on new chat',
type: 'checkbox'
}
]
},
@@ -404,7 +419,7 @@
</div>
<!-- Mobile Header with Horizontal Scrollable Menu -->
<div class="flex flex-col md:hidden">
<div class="flex flex-col pt-6 md:hidden">
<div class="border-b border-border/30 py-4">
<!-- Horizontal Scrollable Category Menu with Navigation -->
<div class="relative flex items-center" style="scroll-padding: 1rem;">

View File

@@ -72,9 +72,10 @@
<div
class="code-preview-wrapper overflow-auto rounded-lg border border-border bg-muted {className}"
style="max-height: {maxHeight};"
style="max-height: {maxHeight}; max-width: {maxWidth};"
>
<pre class="m-0 overflow-x-auto p-4 max-w-[{maxWidth}]"><code class="hljs text-sm leading-relaxed"
<!-- Needs to be formatted as single line for proper rendering -->
<pre class="m-0 overflow-x-auto p-4"><code class="hljs text-sm leading-relaxed"
>{@html highlightedHtml}</code
></pre>
</div>

View File

@@ -179,51 +179,37 @@
});
});
// Handle changes to the model selector pop-down or the model dialog, depending on if the server is in
// router mode or not.
function handleOpenChange(open: boolean) {
if (loading || updating) return;
if (open) {
isOpen = true;
searchTerm = '';
highlightedIndex = -1;
if (isRouter) {
if (open) {
isOpen = true;
searchTerm = '';
highlightedIndex = -1;
// Focus search input after popover opens
tick().then(() => {
requestAnimationFrame(() => searchInputRef?.focus());
});
// Focus search input after popover opens
tick().then(() => {
requestAnimationFrame(() => searchInputRef?.focus());
});
if (isRouter) {
modelsStore.fetchRouterModels().then(() => {
modelsStore.fetchModalitiesForLoadedModels();
});
} else {
isOpen = false;
searchTerm = '';
highlightedIndex = -1;
}
} else {
isOpen = false;
searchTerm = '';
highlightedIndex = -1;
showModelDialog = open;
}
}
function handleTriggerClick() {
if (loading || updating) return;
if (!isRouter) {
// Single model mode: show dialog instead of popover
showModelDialog = true;
}
// For router mode, the Popover handles open/close
}
export function open() {
if (isRouter) {
handleOpenChange(true);
} else {
showModelDialog = true;
}
}
function closeMenu() {
handleOpenChange(false);
handleOpenChange(true);
}
function handleSearchKeyDown(event: KeyboardEvent) {
@@ -292,7 +278,7 @@
}
if (shouldCloseMenu) {
closeMenu();
handleOpenChange(false);
// Focus the chat textarea after model selection
requestAnimationFrame(() => {
@@ -360,8 +346,181 @@
{:else}
{@const selectedOption = getDisplayOption()}
<Popover.Root bind:open={isOpen} onOpenChange={handleOpenChange}>
<Popover.Trigger
{#if isRouter}
<Popover.Root bind:open={isOpen} onOpenChange={handleOpenChange}>
<Popover.Trigger
class={cn(
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
!isCurrentModelInCache()
? 'bg-red-400/10 !text-red-400 hover:bg-red-400/20 hover:text-red-400'
: forceForegroundText
? 'text-foreground'
: isHighlightedCurrentModelActive
? 'text-foreground'
: 'text-muted-foreground',
isOpen ? 'text-foreground' : ''
)}
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
disabled={disabled || updating}
>
<Package class="h-3.5 w-3.5" />
<span class="truncate font-medium">
{selectedOption?.model || 'Select model'}
</span>
{#if updating}
<Loader2 class="h-3 w-3.5 animate-spin" />
{:else}
<ChevronDown class="h-3 w-3.5" />
{/if}
</Popover.Trigger>
<Popover.Content
class="group/popover-content w-96 max-w-[calc(100vw-2rem)] p-0"
align="end"
sideOffset={8}
collisionPadding={16}
>
<div class="flex max-h-[50dvh] flex-col overflow-hidden">
<div
class="order-1 shrink-0 border-b p-4 group-data-[side=top]/popover-content:order-2 group-data-[side=top]/popover-content:border-t group-data-[side=top]/popover-content:border-b-0"
>
<SearchInput
id="model-search"
placeholder="Search models..."
bind:value={searchTerm}
bind:ref={searchInputRef}
onClose={() => handleOpenChange(false)}
onKeyDown={handleSearchKeyDown}
/>
</div>
<div
class="models-list order-2 min-h-0 flex-1 overflow-y-auto group-data-[side=top]/popover-content:order-1"
>
{#if !isCurrentModelInCache() && currentModel}
<!-- Show unavailable model as first option (disabled) -->
<button
type="button"
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-4 py-2 text-left text-sm text-red-400"
role="option"
aria-selected="true"
aria-disabled="true"
disabled
>
<span class="truncate">{selectedOption?.name || currentModel}</span>
<span class="ml-2 text-xs whitespace-nowrap opacity-70">(not available)</span>
</button>
<div class="my-1 h-px bg-border"></div>
{/if}
{#if filteredOptions.length === 0}
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
{/if}
{#each filteredOptions as option, index (option.id)}
{@const status = getModelStatus(option.model)}
{@const isLoaded = status === ServerModelStatus.LOADED}
{@const isLoading = status === ServerModelStatus.LOADING}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isCompatible = isModelCompatible(option)}
{@const isHighlighted = index === highlightedIndex}
{@const missingModalities = getMissingModalities(option)}
<div
class={cn(
'group flex w-full items-center gap-2 px-4 py-2 text-left text-sm transition focus:outline-none',
isCompatible
? 'cursor-pointer hover:bg-muted focus:bg-muted'
: 'cursor-not-allowed opacity-50',
isSelected || isHighlighted
? 'bg-accent text-accent-foreground'
: isCompatible
? 'hover:bg-accent hover:text-accent-foreground'
: '',
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
)}
role="option"
aria-selected={isSelected || isHighlighted}
aria-disabled={!isCompatible}
tabindex={isCompatible ? 0 : -1}
onclick={() => isCompatible && handleSelect(option.id)}
onmouseenter={() => (highlightedIndex = index)}
onkeydown={(e) => {
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
e.preventDefault();
handleSelect(option.id);
}
}}
>
<span class="min-w-0 flex-1 truncate">{option.model}</span>
{#if missingModalities}
<span class="flex shrink-0 items-center gap-1 text-muted-foreground/70">
{#if missingModalities.vision}
<Tooltip.Root>
<Tooltip.Trigger>
<EyeOff class="h-3.5 w-3.5" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>No vision support</p>
</Tooltip.Content>
</Tooltip.Root>
{/if}
{#if missingModalities.audio}
<Tooltip.Root>
<Tooltip.Trigger>
<MicOff class="h-3.5 w-3.5" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>No audio support</p>
</Tooltip.Content>
</Tooltip.Root>
{/if}
</span>
{/if}
{#if isLoading}
<Tooltip.Root>
<Tooltip.Trigger>
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>Loading model...</p>
</Tooltip.Content>
</Tooltip.Root>
{:else if isLoaded}
<Tooltip.Root>
<Tooltip.Trigger>
<button
type="button"
class="relative ml-2 flex h-4 w-4 shrink-0 items-center justify-center"
onclick={(e) => {
e.stopPropagation();
modelsStore.unloadModel(option.model);
}}
>
<span
class="mr-2 h-2 w-2 rounded-full bg-green-500 transition-opacity group-hover:opacity-0"
></span>
<Power
class="absolute mr-2 h-4 w-4 text-red-500 opacity-0 transition-opacity group-hover:opacity-100 hover:text-red-600"
/>
</button>
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>Unload model</p>
</Tooltip.Content>
</Tooltip.Root>
{:else}
<span class="mx-2 h-2 w-2 rounded-full bg-muted-foreground/50"></span>
{/if}
</div>
{/each}
</div>
</div>
</Popover.Content>
</Popover.Root>
{:else}
<button
class={cn(
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
!isCurrentModelInCache()
@@ -374,165 +533,20 @@
isOpen ? 'text-foreground' : ''
)}
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
onclick={handleTriggerClick}
disabled={disabled || updating || !isRouter}
onclick={() => handleOpenChange(true)}
disabled={disabled || updating}
>
<Package class="h-3.5 w-3.5" />
<span class="truncate font-medium">
{selectedOption?.model || 'Select model'}
{selectedOption?.model}
</span>
{#if updating}
<Loader2 class="h-3 w-3.5 animate-spin" />
{:else if isRouter}
<ChevronDown class="h-3 w-3.5" />
{/if}
</Popover.Trigger>
<Popover.Content
class="group/popover-content w-96 max-w-[calc(100vw-2rem)] p-0"
align="end"
sideOffset={8}
collisionPadding={16}
>
<div class="flex max-h-[50dvh] flex-col overflow-hidden">
<div
class="order-1 shrink-0 border-b p-4 group-data-[side=top]/popover-content:order-2 group-data-[side=top]/popover-content:border-t group-data-[side=top]/popover-content:border-b-0"
>
<SearchInput
id="model-search"
placeholder="Search models..."
bind:value={searchTerm}
bind:ref={searchInputRef}
onClose={closeMenu}
onKeyDown={handleSearchKeyDown}
/>
</div>
<div
class="models-list order-2 min-h-0 flex-1 overflow-y-auto group-data-[side=top]/popover-content:order-1"
>
{#if !isCurrentModelInCache() && currentModel}
<!-- Show unavailable model as first option (disabled) -->
<button
type="button"
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-4 py-2 text-left text-sm text-red-400"
role="option"
aria-selected="true"
aria-disabled="true"
disabled
>
<span class="truncate">{selectedOption?.name || currentModel}</span>
<span class="ml-2 text-xs whitespace-nowrap opacity-70">(not available)</span>
</button>
<div class="my-1 h-px bg-border"></div>
{/if}
{#if filteredOptions.length === 0}
<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
{/if}
{#each filteredOptions as option, index (option.id)}
{@const status = getModelStatus(option.model)}
{@const isLoaded = status === ServerModelStatus.LOADED}
{@const isLoading = status === ServerModelStatus.LOADING}
{@const isSelected = currentModel === option.model || activeId === option.id}
{@const isCompatible = isModelCompatible(option)}
{@const isHighlighted = index === highlightedIndex}
{@const missingModalities = getMissingModalities(option)}
<div
class={cn(
'group flex w-full items-center gap-2 px-4 py-2 text-left text-sm transition focus:outline-none',
isCompatible
? 'cursor-pointer hover:bg-muted focus:bg-muted'
: 'cursor-not-allowed opacity-50',
isSelected || isHighlighted
? 'bg-accent text-accent-foreground'
: isCompatible
? 'hover:bg-accent hover:text-accent-foreground'
: '',
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
)}
role="option"
aria-selected={isSelected || isHighlighted}
aria-disabled={!isCompatible}
tabindex={isCompatible ? 0 : -1}
onclick={() => isCompatible && handleSelect(option.id)}
onmouseenter={() => (highlightedIndex = index)}
onkeydown={(e) => {
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
e.preventDefault();
handleSelect(option.id);
}
}}
>
<span class="min-w-0 flex-1 truncate">{option.model}</span>
{#if missingModalities}
<span class="flex shrink-0 items-center gap-1 text-muted-foreground/70">
{#if missingModalities.vision}
<Tooltip.Root>
<Tooltip.Trigger>
<EyeOff class="h-3.5 w-3.5" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>No vision support</p>
</Tooltip.Content>
</Tooltip.Root>
{/if}
{#if missingModalities.audio}
<Tooltip.Root>
<Tooltip.Trigger>
<MicOff class="h-3.5 w-3.5" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>No audio support</p>
</Tooltip.Content>
</Tooltip.Root>
{/if}
</span>
{/if}
{#if isLoading}
<Tooltip.Root>
<Tooltip.Trigger>
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>Loading model...</p>
</Tooltip.Content>
</Tooltip.Root>
{:else if isLoaded}
<Tooltip.Root>
<Tooltip.Trigger>
<button
type="button"
class="relative ml-2 flex h-4 w-4 shrink-0 items-center justify-center"
onclick={(e) => {
e.stopPropagation();
modelsStore.unloadModel(option.model);
}}
>
<span
class="mr-2 h-2 w-2 rounded-full bg-green-500 transition-opacity group-hover:opacity-0"
></span>
<Power
class="absolute mr-2 h-4 w-4 text-red-500 opacity-0 transition-opacity group-hover:opacity-100 hover:text-red-600"
/>
</button>
</Tooltip.Trigger>
<Tooltip.Content class="z-[9999]">
<p>Unload model</p>
</Tooltip.Content>
</Tooltip.Root>
{:else}
<span class="mx-2 h-2 w-2 rounded-full bg-muted-foreground/50"></span>
{/if}
</div>
{/each}
</div>
</div>
</Popover.Content>
</Popover.Root>
</button>
{/if}
{/if}
</div>

View File

@@ -12,9 +12,12 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
showMessageStats: true,
askForTitleConfirmation: false,
pasteLongTextToFileLen: 2500,
copyTextAttachmentsAsPlainText: false,
pdfAsImage: false,
disableAutoScroll: false,
renderUserContentAsMarkdown: false,
alwaysShowSidebarOnDesktop: false,
autoShowSidebarOnNewChat: true,
autoMicOnEmpty: false,
// make sure these default values are in sync with `common.h`
samplers: 'top_k;typ_p;top_p;min_p;temperature',
@@ -50,6 +53,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
'Choose the color theme for the interface. You can choose between System (follows your device settings), Light, or Dark.',
pasteLongTextToFileLen:
'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.',
copyTextAttachmentsAsPlainText:
'When copying a message with text attachments, combine them into a single plain text string instead of a special format that can be pasted back as attachments.',
samplers:
'The order at which samplers are applied, in simplified way. Default is "top_k;typ_p;top_p;min_p;temperature": top_k->typ_p->top_p->min_p->temperature',
temperature:
@@ -96,6 +101,10 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
disableAutoScroll:
'Disable automatic scrolling while messages stream so you can control the viewport position manually.',
renderUserContentAsMarkdown: 'Render user messages using markdown formatting in the chat.',
alwaysShowSidebarOnDesktop:
'Always keep the sidebar visible on desktop instead of auto-hiding it.',
autoShowSidebarOnNewChat:
'Automatically show sidebar when starting a new chat. Disable to keep the sidebar hidden until you click on it.',
autoMicOnEmpty:
'Automatically show microphone button instead of send button when textarea is empty for models with audio modality support.',
pyInterpreterEnabled:

View File

@@ -0,0 +1,262 @@
import { toast } from 'svelte-sonner';
import { AttachmentType } from '$lib/enums';
import type {
DatabaseMessageExtra,
DatabaseMessageExtraTextFile,
DatabaseMessageExtraLegacyContext
} from '$lib/types/database';
/**
* Copy text to clipboard with toast notification
* Uses modern clipboard API when available, falls back to legacy method for non-secure contexts
* @param text - Text to copy to clipboard
* @param successMessage - Custom success message (optional)
* @param errorMessage - Custom error message (optional)
* @returns Promise<boolean> - True if successful, false otherwise
*/
export async function copyToClipboard(
text: string,
successMessage = 'Copied to clipboard',
errorMessage = 'Failed to copy to clipboard'
): Promise<boolean> {
try {
// Try modern clipboard API first (secure contexts only)
if (navigator.clipboard && navigator.clipboard.writeText) {
await navigator.clipboard.writeText(text);
toast.success(successMessage);
return true;
}
// Fallback for non-secure contexts
const textArea = document.createElement('textarea');
textArea.value = text;
textArea.style.position = 'fixed';
textArea.style.left = '-999999px';
textArea.style.top = '-999999px';
document.body.appendChild(textArea);
textArea.focus();
textArea.select();
const successful = document.execCommand('copy');
document.body.removeChild(textArea);
if (successful) {
toast.success(successMessage);
return true;
} else {
throw new Error('execCommand failed');
}
} catch (error) {
console.error('Failed to copy to clipboard:', error);
toast.error(errorMessage);
return false;
}
}
/**
* Copy code with HTML entity decoding and toast notification
* @param rawCode - Raw code string that may contain HTML entities
* @param successMessage - Custom success message (optional)
* @param errorMessage - Custom error message (optional)
* @returns Promise<boolean> - True if successful, false otherwise
*/
export async function copyCodeToClipboard(
rawCode: string,
successMessage = 'Code copied to clipboard',
errorMessage = 'Failed to copy code'
): Promise<boolean> {
const doc = new DOMParser().parseFromString(rawCode, 'text/html');
const decodedCode = doc.body.textContent ?? rawCode;
return copyToClipboard(decodedCode, successMessage, errorMessage);
}
/**
* Format for text attachments when copied to clipboard
*/
export interface ClipboardTextAttachment {
type: typeof AttachmentType.TEXT;
name: string;
content: string;
}
/**
* Parsed result from clipboard content
*/
export interface ParsedClipboardContent {
message: string;
textAttachments: ClipboardTextAttachment[];
}
/**
* Formats a message with text attachments for clipboard copying.
*
* Default format (asPlainText = false):
* ```
* "Text message content"
* [
* {"type":"TEXT","name":"filename.txt","content":"..."},
* {"type":"TEXT","name":"another.txt","content":"..."}
* ]
* ```
*
* Plain text format (asPlainText = true):
* ```
* Text message content
*
* file content here
*
* another file content
* ```
*
* @param content - The message text content
* @param extras - Optional array of message attachments
* @param asPlainText - If true, format as plain text without JSON structure
* @returns Formatted string for clipboard
*/
export function formatMessageForClipboard(
content: string,
extras?: DatabaseMessageExtra[],
asPlainText: boolean = false
): string {
// Filter only text attachments (TEXT type and legacy CONTEXT type)
const textAttachments =
extras?.filter(
(extra): extra is DatabaseMessageExtraTextFile | DatabaseMessageExtraLegacyContext =>
extra.type === AttachmentType.TEXT || extra.type === AttachmentType.LEGACY_CONTEXT
) ?? [];
if (textAttachments.length === 0) {
return content;
}
if (asPlainText) {
const parts = [content];
for (const att of textAttachments) {
parts.push(att.content);
}
return parts.join('\n\n');
}
const clipboardAttachments: ClipboardTextAttachment[] = textAttachments.map((att) => ({
type: AttachmentType.TEXT,
name: att.name,
content: att.content
}));
return `${JSON.stringify(content)}\n${JSON.stringify(clipboardAttachments, null, 2)}`;
}
/**
* Parses clipboard content to extract message and text attachments.
* Supports both plain text and the special format with attachments.
*
* @param clipboardText - Raw text from clipboard
* @returns Parsed content with message and attachments
*/
export function parseClipboardContent(clipboardText: string): ParsedClipboardContent {
const defaultResult: ParsedClipboardContent = {
message: clipboardText,
textAttachments: []
};
if (!clipboardText.startsWith('"')) {
return defaultResult;
}
try {
let stringEndIndex = -1;
let escaped = false;
for (let i = 1; i < clipboardText.length; i++) {
const char = clipboardText[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"') {
stringEndIndex = i;
break;
}
}
if (stringEndIndex === -1) {
return defaultResult;
}
const jsonStringPart = clipboardText.substring(0, stringEndIndex + 1);
const remainingPart = clipboardText.substring(stringEndIndex + 1).trim();
const message = JSON.parse(jsonStringPart) as string;
if (!remainingPart || !remainingPart.startsWith('[')) {
return {
message,
textAttachments: []
};
}
const attachments = JSON.parse(remainingPart) as unknown[];
const validAttachments: ClipboardTextAttachment[] = [];
for (const att of attachments) {
if (isValidTextAttachment(att)) {
validAttachments.push({
type: AttachmentType.TEXT,
name: att.name,
content: att.content
});
}
}
return {
message,
textAttachments: validAttachments
};
} catch {
return defaultResult;
}
}
/**
* Type guard to validate a text attachment object
* @param obj The object to validate
* @returns true if the object is a valid text attachment
*/
function isValidTextAttachment(
obj: unknown
): obj is { type: string; name: string; content: string } {
if (typeof obj !== 'object' || obj === null) {
return false;
}
const record = obj as Record<string, unknown>;
return (
(record.type === AttachmentType.TEXT || record.type === 'TEXT') &&
typeof record.name === 'string' &&
typeof record.content === 'string'
);
}
/**
* Checks if clipboard content contains our special format with attachments
* @param clipboardText - Raw text from clipboard
* @returns true if the clipboard content contains our special format with attachments
*/
export function hasClipboardAttachments(clipboardText: string): boolean {
if (!clipboardText.startsWith('"')) {
return false;
}
const parsed = parseClipboardContent(clipboardText);
return parsed.textAttachments.length > 0;
}

View File

@@ -1,71 +0,0 @@
import { toast } from 'svelte-sonner';
/**
* Copy text to clipboard with toast notification
* Uses modern clipboard API when available, falls back to legacy method for non-secure contexts
* @param text - Text to copy to clipboard
* @param successMessage - Custom success message (optional)
* @param errorMessage - Custom error message (optional)
* @returns Promise<boolean> - True if successful, false otherwise
*/
export async function copyToClipboard(
text: string,
successMessage = 'Copied to clipboard',
errorMessage = 'Failed to copy to clipboard'
): Promise<boolean> {
try {
// Try modern clipboard API first (secure contexts only)
if (navigator.clipboard && navigator.clipboard.writeText) {
await navigator.clipboard.writeText(text);
toast.success(successMessage);
return true;
}
// Fallback for non-secure contexts
const textArea = document.createElement('textarea');
textArea.value = text;
textArea.style.position = 'fixed';
textArea.style.left = '-999999px';
textArea.style.top = '-999999px';
document.body.appendChild(textArea);
textArea.focus();
textArea.select();
const successful = document.execCommand('copy');
document.body.removeChild(textArea);
if (successful) {
toast.success(successMessage);
return true;
} else {
throw new Error('execCommand failed');
}
} catch (error) {
console.error('Failed to copy to clipboard:', error);
toast.error(errorMessage);
return false;
}
}
/**
* Copy code with HTML entity decoding and toast notification
* @param rawCode - Raw code string that may contain HTML entities
* @param successMessage - Custom success message (optional)
* @param errorMessage - Custom error message (optional)
* @returns Promise<boolean> - True if successful, false otherwise
*/
export async function copyCodeToClipboard(
rawCode: string,
successMessage = 'Code copied to clipboard',
errorMessage = 'Failed to copy code'
): Promise<boolean> {
// Decode HTML entities
const decodedCode = rawCode
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
return copyToClipboard(decodedCode, successMessage, errorMessage);
}

View File

@@ -40,7 +40,15 @@ export { setConfigValue, getConfigValue, configToParameterRecord } from './confi
export { createMessageCountMap, getMessageCount } from './conversation-utils';
// Clipboard utilities
export { copyToClipboard, copyCodeToClipboard } from './copy';
export {
copyToClipboard,
copyCodeToClipboard,
formatMessageForClipboard,
parseClipboardContent,
hasClipboardAttachments,
type ClipboardTextAttachment,
type ParsedClipboardContent
} from './clipboard';
// File preview utilities
export { getFileTypeLabel } from './file-preview';

View File

@@ -14,6 +14,7 @@
import { goto } from '$app/navigation';
import { modelsStore } from '$lib/stores/models.svelte';
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
import { IsMobile } from '$lib/hooks/is-mobile.svelte';
let { children } = $props();
@@ -21,6 +22,10 @@
let isHomeRoute = $derived(page.route.id === '/');
let isNewChatMode = $derived(page.url.searchParams.get('new_chat') === 'true');
let showSidebarByDefault = $derived(activeMessages().length > 0 || isLoading());
let alwaysShowSidebarOnDesktop = $derived(config().alwaysShowSidebarOnDesktop);
let autoShowSidebarOnNewChat = $derived(config().autoShowSidebarOnNewChat);
let isMobile = new IsMobile();
let isDesktop = $derived(!isMobile.current);
let sidebarOpen = $state(false);
let innerHeight = $state<number | undefined>();
let chatSidebar:
@@ -76,6 +81,11 @@
}
$effect(() => {
if (alwaysShowSidebarOnDesktop && isDesktop) {
sidebarOpen = true;
return;
}
if (isHomeRoute && !isNewChatMode) {
// Auto-collapse sidebar when navigating to home route (but not in new chat mode)
sidebarOpen = false;
@@ -83,8 +93,11 @@
// Keep sidebar open in new chat mode
sidebarOpen = true;
} else if (isChatRoute) {
// On chat routes, show sidebar by default
sidebarOpen = true;
// On chat routes, only auto-show sidebar if setting is enabled
if (autoShowSidebarOnNewChat) {
sidebarOpen = true;
}
// If setting is disabled, don't change sidebar state - let user control it manually
} else {
// Other routes follow default behavior
sidebarOpen = showSidebarByDefault;
@@ -190,12 +203,14 @@
<ChatSidebar bind:this={chatSidebar} />
</Sidebar.Root>
<Sidebar.Trigger
class="transition-left absolute left-0 z-[900] h-8 w-8 duration-200 ease-linear {sidebarOpen
? 'md:left-[var(--sidebar-width)]'
: ''}"
style="translate: 1rem 1rem;"
/>
{#if !(alwaysShowSidebarOnDesktop && isDesktop)}
<Sidebar.Trigger
class="transition-left absolute left-0 z-[900] h-8 w-8 duration-200 ease-linear {sidebarOpen
? 'md:left-[var(--sidebar-width)]'
: ''}"
style="translate: 1rem 1rem;"
/>
{/if}
<Sidebar.Inset class="flex flex-1 flex-col overflow-hidden">
{@render children?.()}

View File

@@ -1,7 +0,0 @@
import { describe, it, expect } from 'vitest';
describe('sum test', () => {
it('adds 1 + 2 to equal 3', () => {
expect(1 + 2).toBe(3);
});
});

View File

@@ -0,0 +1,423 @@
import { describe, it, expect } from 'vitest';
import { AttachmentType } from '$lib/enums';
import {
formatMessageForClipboard,
parseClipboardContent,
hasClipboardAttachments
} from '$lib/utils/clipboard';
describe('formatMessageForClipboard', () => {
it('returns plain content when no extras', () => {
const result = formatMessageForClipboard('Hello world', undefined);
expect(result).toBe('Hello world');
});
it('returns plain content when extras is empty array', () => {
const result = formatMessageForClipboard('Hello world', []);
expect(result).toBe('Hello world');
});
it('handles empty string content', () => {
const result = formatMessageForClipboard('', undefined);
expect(result).toBe('');
});
it('returns plain content when extras has only non-text attachments', () => {
const extras = [
{
type: AttachmentType.IMAGE as const,
name: 'image.png',
base64Url: 'data:image/png;base64,...'
}
];
const result = formatMessageForClipboard('Hello world', extras);
expect(result).toBe('Hello world');
});
it('filters non-text attachments and keeps only text ones', () => {
const extras = [
{
type: AttachmentType.IMAGE as const,
name: 'image.png',
base64Url: 'data:image/png;base64,...'
},
{
type: AttachmentType.TEXT as const,
name: 'file.txt',
content: 'Text content'
},
{
type: AttachmentType.PDF as const,
name: 'doc.pdf',
base64Data: 'data:application/pdf;base64,...',
content: 'PDF content',
processedAsImages: false
}
];
const result = formatMessageForClipboard('Hello', extras);
expect(result).toContain('"file.txt"');
expect(result).not.toContain('image.png');
expect(result).not.toContain('doc.pdf');
});
it('formats message with text attachments', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'file1.txt',
content: 'File 1 content'
},
{
type: AttachmentType.TEXT as const,
name: 'file2.txt',
content: 'File 2 content'
}
];
const result = formatMessageForClipboard('Hello world', extras);
expect(result).toContain('"Hello world"');
expect(result).toContain('"type": "TEXT"');
expect(result).toContain('"name": "file1.txt"');
expect(result).toContain('"content": "File 1 content"');
expect(result).toContain('"name": "file2.txt"');
});
it('handles content with quotes and special characters', () => {
const content = 'Hello "world" with\nnewline';
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'test.txt',
content: 'Test content'
}
];
const result = formatMessageForClipboard(content, extras);
// Should be valid JSON
expect(result.startsWith('"')).toBe(true);
// The content should be properly escaped
const parsed = JSON.parse(result.split('\n')[0]);
expect(parsed).toBe(content);
});
it('converts legacy context type to TEXT type', () => {
const extras = [
{
type: AttachmentType.LEGACY_CONTEXT as const,
name: 'legacy.txt',
content: 'Legacy content'
}
];
const result = formatMessageForClipboard('Hello', extras);
expect(result).toContain('"type": "TEXT"');
expect(result).not.toContain('"context"');
});
it('handles attachment content with special characters', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'code.js',
content: 'const x = "hello\\nworld";\nconst y = `template ${var}`;'
}
];
const formatted = formatMessageForClipboard('Check this code', extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.textAttachments[0].content).toBe(
'const x = "hello\\nworld";\nconst y = `template ${var}`;'
);
});
it('handles unicode characters in content and attachments', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'unicode.txt',
content: '日本語テスト 🎉 émojis'
}
];
const formatted = formatMessageForClipboard('Привет мир 👋', extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe('Привет мир 👋');
expect(parsed.textAttachments[0].content).toBe('日本語テスト 🎉 émojis');
});
it('formats as plain text when asPlainText is true', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'file1.txt',
content: 'File 1 content'
},
{
type: AttachmentType.TEXT as const,
name: 'file2.txt',
content: 'File 2 content'
}
];
const result = formatMessageForClipboard('Hello world', extras, true);
expect(result).toBe('Hello world\n\nFile 1 content\n\nFile 2 content');
});
it('returns plain content when asPlainText is true but no attachments', () => {
const result = formatMessageForClipboard('Hello world', [], true);
expect(result).toBe('Hello world');
});
it('plain text mode does not use JSON format', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'test.txt',
content: 'Test content'
}
];
const result = formatMessageForClipboard('Hello', extras, true);
expect(result).not.toContain('"type"');
expect(result).not.toContain('[');
expect(result).toBe('Hello\n\nTest content');
});
});
describe('parseClipboardContent', () => {
it('returns plain text as message when not in special format', () => {
const result = parseClipboardContent('Hello world');
expect(result.message).toBe('Hello world');
expect(result.textAttachments).toHaveLength(0);
});
it('handles empty string input', () => {
const result = parseClipboardContent('');
expect(result.message).toBe('');
expect(result.textAttachments).toHaveLength(0);
});
it('handles whitespace-only input', () => {
const result = parseClipboardContent(' \n\t ');
expect(result.message).toBe(' \n\t ');
expect(result.textAttachments).toHaveLength(0);
});
it('returns plain text as message when starts with quote but invalid format', () => {
const result = parseClipboardContent('"Unclosed quote');
expect(result.message).toBe('"Unclosed quote');
expect(result.textAttachments).toHaveLength(0);
});
it('returns original text when JSON array is malformed', () => {
const input = '"Hello"\n[invalid json';
const result = parseClipboardContent(input);
expect(result.message).toBe('"Hello"\n[invalid json');
expect(result.textAttachments).toHaveLength(0);
});
it('parses message with text attachments', () => {
const input = `"Hello world"
[
{"type":"TEXT","name":"file1.txt","content":"File 1 content"},
{"type":"TEXT","name":"file2.txt","content":"File 2 content"}
]`;
const result = parseClipboardContent(input);
expect(result.message).toBe('Hello world');
expect(result.textAttachments).toHaveLength(2);
expect(result.textAttachments[0].name).toBe('file1.txt');
expect(result.textAttachments[0].content).toBe('File 1 content');
expect(result.textAttachments[1].name).toBe('file2.txt');
expect(result.textAttachments[1].content).toBe('File 2 content');
});
it('handles escaped quotes in message', () => {
const input = `"Hello \\"world\\" with quotes"
[
{"type":"TEXT","name":"file.txt","content":"test"}
]`;
const result = parseClipboardContent(input);
expect(result.message).toBe('Hello "world" with quotes');
expect(result.textAttachments).toHaveLength(1);
});
it('handles newlines in message', () => {
const input = `"Hello\\nworld"
[
{"type":"TEXT","name":"file.txt","content":"test"}
]`;
const result = parseClipboardContent(input);
expect(result.message).toBe('Hello\nworld');
expect(result.textAttachments).toHaveLength(1);
});
it('returns message only when no array follows', () => {
const input = '"Just a quoted string"';
const result = parseClipboardContent(input);
expect(result.message).toBe('Just a quoted string');
expect(result.textAttachments).toHaveLength(0);
});
it('filters out invalid attachment objects', () => {
const input = `"Hello"
[
{"type":"TEXT","name":"valid.txt","content":"valid"},
{"type":"INVALID","name":"invalid.txt","content":"invalid"},
{"name":"missing-type.txt","content":"missing"},
{"type":"TEXT","content":"missing name"}
]`;
const result = parseClipboardContent(input);
expect(result.message).toBe('Hello');
expect(result.textAttachments).toHaveLength(1);
expect(result.textAttachments[0].name).toBe('valid.txt');
});
it('handles empty attachments array', () => {
const input = '"Hello"\n[]';
const result = parseClipboardContent(input);
expect(result.message).toBe('Hello');
expect(result.textAttachments).toHaveLength(0);
});
it('roundtrips correctly with formatMessageForClipboard', () => {
const originalContent = 'Hello "world" with\nspecial characters';
const originalExtras = [
{
type: AttachmentType.TEXT as const,
name: 'file1.txt',
content: 'Content with\nnewlines and "quotes"'
},
{
type: AttachmentType.TEXT as const,
name: 'file2.txt',
content: 'Another file'
}
];
const formatted = formatMessageForClipboard(originalContent, originalExtras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe(originalContent);
expect(parsed.textAttachments).toHaveLength(2);
expect(parsed.textAttachments[0].name).toBe('file1.txt');
expect(parsed.textAttachments[0].content).toBe('Content with\nnewlines and "quotes"');
expect(parsed.textAttachments[1].name).toBe('file2.txt');
expect(parsed.textAttachments[1].content).toBe('Another file');
});
});
describe('hasClipboardAttachments', () => {
it('returns false for plain text', () => {
expect(hasClipboardAttachments('Hello world')).toBe(false);
});
it('returns false for empty string', () => {
expect(hasClipboardAttachments('')).toBe(false);
});
it('returns false for quoted string without attachments', () => {
expect(hasClipboardAttachments('"Hello world"')).toBe(false);
});
it('returns true for valid format with attachments', () => {
const input = `"Hello"
[{"type":"TEXT","name":"file.txt","content":"test"}]`;
expect(hasClipboardAttachments(input)).toBe(true);
});
it('returns false for format with empty attachments array', () => {
const input = '"Hello"\n[]';
expect(hasClipboardAttachments(input)).toBe(false);
});
it('returns false for malformed JSON', () => {
expect(hasClipboardAttachments('"Hello"\n[broken')).toBe(false);
});
});
describe('roundtrip edge cases', () => {
it('preserves empty message with attachments', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'file.txt',
content: 'Content only'
}
];
const formatted = formatMessageForClipboard('', extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe('');
expect(parsed.textAttachments).toHaveLength(1);
expect(parsed.textAttachments[0].content).toBe('Content only');
});
it('preserves attachment with empty content', () => {
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'empty.txt',
content: ''
}
];
const formatted = formatMessageForClipboard('Message', extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe('Message');
expect(parsed.textAttachments).toHaveLength(1);
expect(parsed.textAttachments[0].content).toBe('');
});
it('preserves multiple backslashes', () => {
const content = 'Path: C:\\\\Users\\\\test\\\\file.txt';
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'path.txt',
content: 'D:\\\\Data\\\\file'
}
];
const formatted = formatMessageForClipboard(content, extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe(content);
expect(parsed.textAttachments[0].content).toBe('D:\\\\Data\\\\file');
});
it('preserves tabs and various whitespace', () => {
const content = 'Line1\t\tTabbed\n Spaced\r\nCRLF';
const extras = [
{
type: AttachmentType.TEXT as const,
name: 'whitespace.txt',
content: '\t\t\n\n '
}
];
const formatted = formatMessageForClipboard(content, extras);
const parsed = parseClipboardContent(formatted);
expect(parsed.message).toBe(content);
expect(parsed.textAttachments[0].content).toBe('\t\t\n\n ');
});
});

View File

@@ -1,6 +1,6 @@
/* eslint-disable no-irregular-whitespace */
import { describe, it, expect, test } from 'vitest';
import { maskInlineLaTeX, preprocessLaTeX } from './latex-protection';
import { maskInlineLaTeX, preprocessLaTeX } from '$lib/utils/latex-protection';
describe('maskInlineLaTeX', () => {
it('should protect LaTeX $x + y$ but not money $3.99', () => {

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest';
import { isValidModelName, normalizeModelName } from './model-names';
import { isValidModelName, normalizeModelName } from '$lib/utils/model-names';
describe('normalizeModelName', () => {
it('preserves Hugging Face org/model format (single slash)', () => {

View File

@@ -125,9 +125,9 @@ export default defineConfig({
{
extends: './vite.config.ts',
test: {
name: 'server',
name: 'unit',
environment: 'node',
include: ['tests/server/**/*.{test,spec}.{js,ts}']
include: ['tests/unit/**/*.{test,spec}.{js,ts}']
}
},
{