Compare commits

...

3 Commits
b8744 ... b8747

Author SHA1 Message Date
Adrien Gallouët
fb38d6f278 common : fix when loading a cached HF models with unavailable API (#21670)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-04-10 16:37:46 +02:00
Johannes Gäßler
0893f50f2d common: mark --split-mode tensor as experimental (#21684) 2026-04-10 12:27:27 +02:00
Aleksander Grygier
f989a6e39e webui: Static build output improvements (#21667)
* refactor: Build improvements

* chore: Formatting + package lock update
2026-04-10 11:49:47 +02:00
10 changed files with 12897 additions and 294 deletions

View File

@@ -2353,7 +2353,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"- none: use one GPU only\n"
"- layer (default): split layers and KV across GPUs (pipelined)\n"
"- row: split weight across GPUs by rows (parallelized)\n"
"- tensor: split weights and KV across GPUs (parallelized)",
"- tensor: split weights and KV across GPUs (parallelized, EXPERIMENTAL)",
[](common_params & params, const std::string & value) {
if (value == "none") {
params.split_mode = LLAMA_SPLIT_MODE_NONE;

View File

@@ -283,6 +283,13 @@ static int common_download_file_single_online(const std::string & url,
static const int max_attempts = 3;
static const int retry_delay_seconds = 2;
const bool file_exists = std::filesystem::exists(path);
if (file_exists && skip_etag) {
LOG_DBG("%s: using cached file: %s\n", __func__, path.c_str());
return 304; // 304 Not Modified - fake cached response
}
auto [cli, parts] = common_http_client(url);
httplib::Headers headers;
@@ -297,13 +304,6 @@ static int common_download_file_single_online(const std::string & url,
}
cli.set_default_headers(headers);
const bool file_exists = std::filesystem::exists(path);
if (file_exists && skip_etag) {
LOG_DBG("%s: using cached file: %s\n", __func__, path.c_str());
return 304; // 304 Not Modified - fake cached response
}
std::string last_etag;
if (file_exists) {
last_etag = read_etag(path);

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
<!--
This is a single file build of the frontend.
This is a static build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
@@ -18,7 +18,7 @@
<div style="display: contents">
<script>
{
__sveltekit_1ao0o9h = {
__sveltekit__ = {
base: new URL('.', location).pathname.slice(0, -1)
};

View File

@@ -1,11 +1,11 @@
{
"name": "webui",
"name": "llama-server-webui",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "webui",
"name": "llama-server-webui",
"version": "1.0.0",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.1",

View File

@@ -1,5 +1,5 @@
{
"name": "webui",
"name": "llama-server-webui",
"private": true,
"version": "1.0.0",
"type": "module",

View File

@@ -0,0 +1,84 @@
import { readFileSync, writeFileSync, existsSync, readdirSync, copyFileSync } from 'fs';
import { resolve } from 'path';
import type { Plugin } from 'vite';
const GUIDE_FOR_FRONTEND = `
<!--
This is a static build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
-->
`.trim();
export function llamaCppBuildPlugin(): Plugin {
return {
name: 'llamacpp:build',
apply: 'build',
closeBundle() {
// Ensure the SvelteKit adapter has finished writing to ../public
setTimeout(() => {
try {
const indexPath = resolve('../public/index.html');
if (!existsSync(indexPath)) return;
let content = readFileSync(indexPath, 'utf-8');
const faviconPath = resolve('static/favicon.svg');
if (existsSync(faviconPath)) {
const faviconContent = readFileSync(faviconPath, 'utf-8');
const faviconBase64 = Buffer.from(faviconContent).toString('base64');
const faviconDataUrl = `data:image/svg+xml;base64,${faviconBase64}`;
content = content.replace(/href="[^"]*favicon\.svg"/g, `href="${faviconDataUrl}"`);
console.log('✓ Inlined favicon.svg as base64 data URL');
}
content = content.replace(/\r/g, '');
content = GUIDE_FOR_FRONTEND + '\n' + content;
content = content.replace(/\/_app\/immutable\/bundle\.[^"]+\.js/g, './bundle.js');
content = content.replace(
/\/_app\/immutable\/assets\/bundle\.[^"]+\.css/g,
'./bundle.css'
);
content = content.replace(/__sveltekit_[a-z0-9]+/g, '__sveltekit__');
writeFileSync(indexPath, content, 'utf-8');
console.log('✓ Updated index.html');
// Copy bundle.*.js -> ../public/bundle.js
const immutableDir = resolve('../public/_app/immutable');
const bundleDir = resolve('../public/_app/immutable/assets');
if (existsSync(immutableDir)) {
const jsFiles = readdirSync(immutableDir).filter((f) => f.match(/^bundle\..+\.js$/));
if (jsFiles.length > 0) {
copyFileSync(resolve(immutableDir, jsFiles[0]), resolve('../public/bundle.js'));
// Normalize __sveltekit_<hash> to __sveltekit__ in bundle.js
const bundleJsPath = resolve('../public/bundle.js');
let bundleJs = readFileSync(bundleJsPath, 'utf-8');
bundleJs = bundleJs.replace(/__sveltekit_[a-z0-9]+/g, '__sveltekit__');
writeFileSync(bundleJsPath, bundleJs, 'utf-8');
console.log(`✓ Copied ${jsFiles[0]} -> bundle.js`);
}
}
// Copy bundle.*.css -> ../public/bundle.css
if (existsSync(bundleDir)) {
const cssFiles = readdirSync(bundleDir).filter((f) => f.match(/^bundle\..+\.css$/));
if (cssFiles.length > 0) {
copyFileSync(resolve(bundleDir, cssFiles[0]), resolve('../public/bundle.css'));
console.log(`✓ Copied ${cssFiles[0]} -> bundle.css`);
}
}
} catch (error) {
console.error('Failed to update index.html:', error);
}
}, 100);
}
};
}

View File

@@ -22,7 +22,8 @@
</p>
{:else}
<p class="text-xs text-muted-foreground">
Press <kbd class="rounded bg-muted px-1 py-0.5 font-mono text-xs">{modKey} + Enter</kbd> to send,
Press <kbd class="rounded bg-muted px-1 py-0.5 font-mono text-xs">{modKey} + Enter</kbd> to
send,
<kbd class="rounded bg-muted px-1 py-0.5 font-mono text-xs">Enter</kbd> for new line
</p>
{/if}

View File

@@ -25,6 +25,9 @@ const config = {
},
alias: {
$styles: 'src/styles'
},
version: {
name: 'llama-server-webui'
}
},

View File

@@ -1,108 +1,33 @@
import tailwindcss from '@tailwindcss/vite';
import { sveltekit } from '@sveltejs/kit/vite';
import { readFileSync, writeFileSync, existsSync, readdirSync, copyFileSync } from 'fs';
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';
import { defineConfig, searchForWorkspaceRoot } from 'vite';
import devtoolsJson from 'vite-plugin-devtools-json';
import { storybookTest } from '@storybook/addon-vitest/vitest-plugin';
import { llamaCppBuildPlugin } from './scripts/vite-plugin-llama-cpp-build';
const __dirname = dirname(fileURLToPath(import.meta.url));
const GUIDE_FOR_FRONTEND = `
<!--
This is a single file build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
-->
`.trim();
/**
* the maximum size of an embedded asset in bytes,
* e.g. maximum size of embedded font (see node_modules/katex/dist/fonts/*.woff2)
*/
const MAX_ASSET_SIZE = 32000;
/** public/index.html minified flag */
const ENABLE_JS_MINIFICATION = true;
function llamaCppBuildPlugin() {
return {
name: 'llamacpp:build',
apply: 'build' as const,
closeBundle() {
// Ensure the SvelteKit adapter has finished writing to ../public
setTimeout(() => {
try {
const indexPath = resolve('../public/index.html');
if (!existsSync(indexPath)) {
return;
}
let content = readFileSync(indexPath, 'utf-8');
const faviconPath = resolve('static/favicon.svg');
if (existsSync(faviconPath)) {
const faviconContent = readFileSync(faviconPath, 'utf-8');
const faviconBase64 = Buffer.from(faviconContent).toString('base64');
const faviconDataUrl = `data:image/svg+xml;base64,${faviconBase64}`;
content = content.replace(/href="[^"]*favicon\.svg"/g, `href="${faviconDataUrl}"`);
console.log('✓ Inlined favicon.svg as base64 data URL');
}
content = content.replace(/\r/g, '');
content = GUIDE_FOR_FRONTEND + '\n' + content;
content = content.replace(/\/_app\/immutable\/bundle\.[^"]+\.js/g, './bundle.js');
content = content.replace(
/\/_app\/immutable\/assets\/bundle\.[^"]+\.css/g,
'./bundle.css'
);
writeFileSync(indexPath, content, 'utf-8');
console.log('✓ Updated index.html');
// Copy bundle.*.js -> ../public/bundle.js
const immutableDir = resolve('../public/_app/immutable');
const bundleDir = resolve('../public/_app/immutable/assets');
if (existsSync(immutableDir)) {
const jsFiles = readdirSync(immutableDir).filter((f) => f.match(/^bundle\..+\.js$/));
if (jsFiles.length > 0) {
copyFileSync(resolve(immutableDir, jsFiles[0]), resolve('../public/bundle.js'));
console.log(`✓ Copied ${jsFiles[0]} -> bundle.js`);
}
}
// Copy bundle.*.css -> ../public/bundle.css
if (existsSync(bundleDir)) {
const cssFiles = readdirSync(bundleDir).filter((f) => f.match(/^bundle\..+\.css$/));
if (cssFiles.length > 0) {
copyFileSync(resolve(bundleDir, cssFiles[0]), resolve('../public/bundle.css'));
console.log(`✓ Copied ${cssFiles[0]} -> bundle.css`);
}
}
} catch (error) {
console.error('Failed to update index.html:', error);
}
}, 100);
}
};
}
export default defineConfig({
resolve: {
alias: {
'katex-fonts': resolve('node_modules/katex/dist/fonts')
}
},
build: {
assetsInlineLimit: MAX_ASSET_SIZE,
assetsInlineLimit: 32000,
chunkSizeWarningLimit: 3072,
minify: ENABLE_JS_MINIFICATION
minify: true
},
esbuild: {
lineLimit: 500,
minifyIdentifiers: false
},
css: {
preprocessorOptions: {
scss: {
@@ -114,7 +39,9 @@ export default defineConfig({
}
}
},
plugins: [tailwindcss(), sveltekit(), devtoolsJson(), llamaCppBuildPlugin()],
test: {
projects: [
{
@@ -131,6 +58,7 @@ export default defineConfig({
setupFiles: ['./vitest-setup-client.ts']
}
},
{
extends: './vite.config.ts',
test: {
@@ -139,6 +67,7 @@ export default defineConfig({
include: ['tests/unit/**/*.{test,spec}.{js,ts}']
}
},
{
extends: './vite.config.ts',
test: {