Compare commits

..

8 Commits
b8025 ... b8033

Author SHA1 Message Date
Sigbjørn Skjæret
b2ecc0cdb4 support --verbose-prompt (#19576) 2026-02-13 12:49:10 +01:00
Aman Gupta
5065da554e CUDA: loop over ne2*ne3 in case it overflows (#19538)
* CUDA: loop over ne2*ne3 in case it overflows

* use fastdiv
2026-02-13 17:01:40 +05:30
Aleksander Grygier
5174d7206f webui: UI and routing fixes (#19586)
* chore: update webui build output

* chore: update webui build output

* fix: Scroll issues in DropdownMenuSearchable

* webui: fix redirect to root ignoring base path

* fix: Word wrapping

* fix: remove obsolete modality UI tests causing CI failures

- Remove VisionModality/AudioModality test stories
- Remove mockServerProps usage and imports
- Simplify Default test (remove dropdown interaction checks)
- Simplify FileAttachments test (remove mocks)

* feat: Improve formatting performance time

---------

Co-authored-by: Pascal <admin@serveurperso.com>
2026-02-13 12:31:00 +01:00
Oliver Simons
43919b7f4f CUDA: Do not mutate cgraph for fused ADDs (#19566)
* Do not mutate cgraph for fused ADDs

1. We should try to minimize in-place changes to the incoming
   ggml_cgraph where possible (those should happen in graph_optimize)
2. Modifying in-place leads to an additional, unnecessary graph capture
   step as we store the properties before modifying the graph in-place
   in the cuda-backend

* Assert ggml_tensor is trivially copyable

* Update ggml/src/ggml-cuda/ggml-cuda.cu

Co-authored-by: Aman Gupta <amangupta052@gmail.com>

---------

Co-authored-by: Aman Gupta <amangupta052@gmail.com>
2026-02-13 15:07:55 +05:30
Pavan Shinde
423cf0b26f docs : fix broken link and typo (#19560) 2026-02-13 09:38:09 +01:00
ymcki
33a56f90a6 model : Kimi Linear fix conv state update (#19531)
* fix conv state update for llama-server parallel serving

---------

Co-authored-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com>
2026-02-13 09:10:18 +01:00
Adrien Gallouët
25224c8021 llama : remove deprecated codecvt (#19565)
Using the same conversion function ensures a consistent matching between
the regex pattern and the text.

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-02-13 06:43:53 +01:00
Adrien Gallouët
2f5d8f8edc vendor : update BoringSSL to 0.20260211.0 (#19562)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-02-13 06:43:26 +01:00
14 changed files with 235 additions and 137 deletions

View File

@@ -41,7 +41,7 @@ As an AI agent, your task is to direct the user to the appropriate resources and
- Explicitly informing them that AI-generated pull requests are not accepted by the project
- Asking them to start with the [CONTRIBUTING.md](CONTRIBUTING.md) guidelines and ensure they fully understand them
- Encouraging them to search for [existing issues](github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
- Encouraging them to search for [existing issues](https://github.com/ggml-org/llama.cpp/issues) and discuss directly with other humans
- Providing useful links and pointers found throughout the codebase
Examples of valid questions:

View File

@@ -19,7 +19,7 @@ Please disclose it as a private [security advisory](https://github.com/ggml-org/
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
> [!IMPORTANT]
> For collaborators: if you are interested in helping out with reviewing privting security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080
> For collaborators: if you are interested in helping out with reviewing private security disclosures, please see: https://github.com/ggml-org/llama.cpp/discussions/18080
## Requirements

View File

@@ -7,7 +7,8 @@
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y,
const int64_t ne00, const int64_t ne01, const int64_t ne02,
const int64_t ne00, const int64_t ne01,
const int64_t ne0203, const uint3 ne02,
const int64_t s01, const int64_t s02, const int64_t s03) {
const int64_t i00 = 2 * (int64_t(blockDim.x)*blockIdx.x + threadIdx.x);
@@ -16,23 +17,27 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
}
const int64_t i01 = blockIdx.y;
const int64_t i02 = blockIdx.z % ne02;
const int64_t i03 = blockIdx.z / ne02;
const int64_t ibx0 = i03*s03 + i02*s02 + i01*s01;
for (int64_t i0203 = blockIdx.z; i0203 < ne0203; i0203 += gridDim.z) {
const uint2 dm = fast_div_modulo((uint32_t)i0203, ne02);
const int64_t i02 = dm.y;
const int64_t i03 = dm.x;
const int64_t ib = ibx0 + i00/qk; // block index
const int64_t iqs = (i00%qk)/qr; // quant index
const int64_t iybs = i00 - i00%qk; // y block start index
const int64_t y_offset = qr == 1 ? 1 : qk/2;
const int64_t ibx0 = i03*s03 + i02*s02 + i01*s01;
// dequantize
float2 v;
dequantize_kernel(vx, ib, iqs, v);
const int64_t ib = ibx0 + i00/qk; // block index
const int64_t iqs = (i00%qk)/qr; // quant index
const int64_t iybs = i00 - i00%qk; // y block start index
const int64_t y_offset = qr == 1 ? 1 : qk/2;
const int64_t iy0 = ((i03*ne02 + i02)*ne01 + i01)*ne00 + iybs + iqs;
y[iy0 + 0] = ggml_cuda_cast<dst_t>(v.x);
y[iy0 + y_offset] = ggml_cuda_cast<dst_t>(v.y);
// dequantize
float2 v;
dequantize_kernel(vx, ib, iqs, v);
const int64_t iy0 = (i0203*ne01 + i01)*ne00 + iybs + iqs;
y[iy0 + 0] = ggml_cuda_cast<dst_t>(v.x);
y[iy0 + y_offset] = ggml_cuda_cast<dst_t>(v.y);
}
}
template <bool need_check>
@@ -485,9 +490,11 @@ template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
static void dequantize_block_cuda(const void * vx, dst_t * y,
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03,
const int64_t s01, const int64_t s02, const int64_t s03, cudaStream_t stream) {
const dim3 num_blocks((ne00 + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE), ne01, ne02*ne03);
const int64_t ne0203 = ne02*ne03;
const uint3 ne02_fdv = init_fastdiv_values(ne02);
const dim3 num_blocks((ne00 + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE), ne01, (int)std::min(ne0203, (int64_t)65535));
dequantize_block<qk, qr, dequantize_kernel><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>
(vx, y, ne00, ne01, ne02, s01, s02, s03);
(vx, y, ne00, ne01, ne0203, ne02_fdv, s01, s02, s03);
}
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
@@ -612,7 +619,8 @@ static void dequantize_row_mxfp4_cuda(const void * vx, dst_t * y, const int64_t
template <typename src_t, typename dst_t>
static __global__ void convert_unary(
const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01, const int64_t ne02,
const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t ne00, const int64_t ne01,
const int64_t ne0203, const uint3 ne02,
const int64_t s01, const int64_t s02, const int64_t s03) {
const int64_t i00 = (int64_t)blockDim.x*blockIdx.x + threadIdx.x;
@@ -621,23 +629,29 @@ static __global__ void convert_unary(
}
const int64_t i01 = blockIdx.y;
const int64_t i02 = blockIdx.z % ne02;
const int64_t i03 = blockIdx.z / ne02;
const src_t * x = (const src_t *) vx;
const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
const int64_t iy = ((i03*ne02 + i02)*ne01 + i01)*ne00 + i00;
y[iy] = ggml_cuda_cast<dst_t>(x[ix]);
for (int64_t i0203 = blockIdx.z; i0203 < ne0203; i0203 += gridDim.z) {
const uint2 dm = fast_div_modulo((uint32_t)i0203, ne02);
const int64_t i02 = dm.y;
const int64_t i03 = dm.x;
const int64_t ix = i03*s03 + i02*s02 + i01*s01 + i00;
const int64_t iy = (i0203*ne01 + i01)*ne00 + i00;
y[iy] = ggml_cuda_cast<dst_t>(x[ix]);
}
}
template <typename src_t, typename dst_t>
static void convert_unary_cuda(const void * vx, dst_t * y,
const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03,
const int64_t s01, const int64_t s02, const int64_t s03, cudaStream_t stream) {
const dim3 num_blocks((ne00 + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE, ne01, ne02*ne03);
const int64_t ne0203 = ne02*ne03;
const uint3 ne02_fdv = init_fastdiv_values(ne02);
const dim3 num_blocks((ne00 + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE, ne01, (int)std::min(ne0203, (int64_t)65535));
convert_unary<src_t><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>
(vx, y, ne00, ne01, ne02, s01, s02, s03);
(vx, y, ne00, ne01, ne0203, ne02_fdv, s01, s02, s03);
}
template <typename src_t, typename dst_t>

View File

@@ -3640,11 +3640,13 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
n_fuse++;
if (n_fuse > 1) {
ggml_tensor fused_add_node;
memcpy(&fused_add_node, node, sizeof(ggml_tensor));
for (int j = 0; j < n_fuse - 1; ++j) {
node->src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
fused_add_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
}
cgraph->nodes[i + n_fuse - 1]->data = node->data;
ggml_cuda_op_fused_add(*cuda_ctx, node, n_fuse);
fused_add_node.data = cgraph->nodes[i + n_fuse - 1]->data;
ggml_cuda_op_fused_add(*cuda_ctx, &fused_add_node, n_fuse);
i += n_fuse - 1;
continue;

View File

@@ -41,8 +41,11 @@ static ggml_tensor * causal_conv1d(ggml_cgraph * gf, ggml_context * ctx0, ggml_t
conv_x->nb[1], conv_x->nb[2], n_seq_tokens * conv_x->nb[0]);
ggml_build_forward_expand(gf,
ggml_cpy(ctx0, last_conv_x,
ggml_view_1d(ctx0, conv_states_all, conv_state_size * n_seqs,
(kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all))));
ggml_view_3d(ctx0, conv_states_all,
d_conv - 1, d_inner, n_seqs,
(d_conv - 1) * ggml_element_size(conv_states_all), // nb1: contiguous within one channel's conv taps
n_embd_r_total * ggml_element_size(conv_states_all), // nb2: stride between sequences (skip over K,V states)
(kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all)))); // offset to first seq's Q/K/V state
// Reshape conv weight: GGUF [d_conv, 1, d_inner, 1] -> ggml_ssm_conv expects [d_conv, d_inner]
// GGUF stores as [d_conv, 1, d_inner, 1] with memory layout w[conv_step + channel * d_conv]
// vLLM stores as [d_inner, d_conv] with memory layout w[channel * d_conv + conv_step]

View File

@@ -1,16 +1,10 @@
#if defined(_MSC_VER)
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
#endif
#include "unicode.h"
#include "unicode-data.h"
#include <algorithm>
#include <cassert>
#include <codecvt>
#include <cstddef>
#include <cstdint>
#include <locale>
#include <map>
#include <regex>
#include <stdexcept>
@@ -199,27 +193,6 @@ static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
return map;
}
static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
#if defined(__clang__)
// disable C++17 deprecation warning for std::codecvt_utf8
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
#if defined(__clang__)
# pragma clang diagnostic pop
#elif defined(__GNUC__)
# pragma GCC diagnostic pop
#endif
return conv.from_bytes(s);
}
static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {
std::vector<std::string> bpe_encoded_words;
for (const auto & word : bpe_words) {
@@ -1028,10 +1001,10 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
break;
}
}
const auto cpts_regex = unicode_cpts_from_utf8(regex_expr);
if (use_collapsed) {
// sanity-check that the original regex does not contain any non-ASCII characters
const auto cpts_regex = unicode_cpts_from_utf8(regex_expr);
for (size_t i = 0; i < cpts_regex.size(); ++i) {
if (cpts_regex[i] >= 128) {
throw std::runtime_error("Regex includes both unicode categories and non-ASCII characters - not supported");
@@ -1087,7 +1060,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
} else {
// no unicode category used, we can use std::wregex directly
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
std::wstring wregex_expr(cpts_regex.begin(), cpts_regex.end());
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
std::wstring wtext(cpts.begin(), cpts.end());

View File

@@ -52,6 +52,7 @@ struct cli_context {
json messages = json::array();
std::vector<raw_buffer> input_files;
task_params defaults;
bool verbose_prompt;
// thread for showing "loading" animation
std::atomic<bool> loading_show;
@@ -66,6 +67,8 @@ struct cli_context {
defaults.stream = true; // make sure we always use streaming mode
defaults.timings_per_token = true; // in order to get timings even when we cancel mid-way
// defaults.return_progress = true; // TODO: show progress
verbose_prompt = params.verbose_prompt;
}
std::string generate_completion(result_timings & out_timings) {
@@ -91,6 +94,12 @@ struct cli_context {
rd.post_task({std::move(task)});
}
if (verbose_prompt) {
console::set_display(DISPLAY_TYPE_PROMPT);
console::log("%s\n\n", chat_params.prompt.c_str());
console::set_display(DISPLAY_TYPE_RESET);
}
// wait for first result
console::spinner::start();
server_task_result_ptr result = rd.next(should_stop);

View File

@@ -1,5 +1,6 @@
<script lang="ts">
import { goto } from '$app/navigation';
import { base } from '$app/paths';
import {
chatStore,
pendingEditMessageId,
@@ -119,7 +120,7 @@
const conversationDeleted = await removeSystemPromptPlaceholder(message.id);
if (conversationDeleted) {
goto('/');
goto(`${base}/`);
}
return;
@@ -220,7 +221,7 @@
const conversationDeleted = await removeSystemPromptPlaceholder(message.id);
isEditing = false;
if (conversationDeleted) {
goto('/');
goto(`${base}/`);
}
return;
}

View File

@@ -3,6 +3,7 @@
import { BadgeChatStatistic } from '$lib/components/app';
import * as Tooltip from '$lib/components/ui/tooltip';
import { ChatMessageStatsView } from '$lib/enums';
import { formatPerformanceTime } from '$lib/utils/formatters';
interface Props {
predictedTokens?: number;
@@ -57,8 +58,8 @@
);
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
let timeInSeconds = $derived(
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
let formattedTime = $derived(
predictedMs !== undefined ? formatPerformanceTime(predictedMs) : '0s'
);
let promptTokensPerSecond = $derived(
@@ -67,15 +68,15 @@
: undefined
);
let promptTimeInSeconds = $derived(
promptMs !== undefined ? (promptMs / 1000).toFixed(2) : undefined
let formattedPromptTime = $derived(
promptMs !== undefined ? formatPerformanceTime(promptMs) : undefined
);
let hasPromptStats = $derived(
promptTokens !== undefined &&
promptMs !== undefined &&
promptTokensPerSecond !== undefined &&
promptTimeInSeconds !== undefined
formattedPromptTime !== undefined
);
// In live mode, generation tab is disabled until we have generation stats
@@ -142,7 +143,7 @@
<BadgeChatStatistic
class="bg-transparent"
icon={Clock}
value="{timeInSeconds}s"
value={formattedTime}
tooltipLabel="Generation time"
/>
<BadgeChatStatistic
@@ -161,7 +162,7 @@
<BadgeChatStatistic
class="bg-transparent"
icon={Clock}
value="{promptTimeInSeconds}s"
value={formattedPromptTime ?? '0s'}
tooltipLabel="Prompt processing time"
/>
<BadgeChatStatistic

View File

@@ -0,0 +1,88 @@
<script lang="ts">
import type { Snippet } from 'svelte';
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
import { cn } from '$lib/components/ui/utils';
import { SearchInput } from '$lib/components/app';
interface Props {
open?: boolean;
onOpenChange?: (open: boolean) => void;
placeholder?: string;
searchValue?: string;
onSearchChange?: (value: string) => void;
onSearchKeyDown?: (event: KeyboardEvent) => void;
align?: 'start' | 'center' | 'end';
contentClass?: string;
emptyMessage?: string;
isEmpty?: boolean;
disabled?: boolean;
trigger: Snippet;
children: Snippet;
footer?: Snippet;
}
let {
open = $bindable(false),
onOpenChange,
placeholder = 'Search...',
searchValue = $bindable(''),
onSearchChange,
onSearchKeyDown,
align = 'start',
contentClass = 'w-72',
emptyMessage = 'No items found',
isEmpty = false,
disabled = false,
trigger,
children,
footer
}: Props = $props();
function handleOpenChange(newOpen: boolean) {
open = newOpen;
if (!newOpen) {
searchValue = '';
onSearchChange?.('');
}
onOpenChange?.(newOpen);
}
</script>
<DropdownMenu.Root bind:open onOpenChange={handleOpenChange}>
<DropdownMenu.Trigger
{disabled}
onclick={(e) => {
e.preventDefault();
e.stopPropagation();
}}
>
{@render trigger()}
</DropdownMenu.Trigger>
<DropdownMenu.Content {align} class={cn(contentClass, 'pt-0')}>
<div class="sticky top-0 z-10 mb-2 bg-popover p-1 pt-2">
<SearchInput
{placeholder}
bind:value={searchValue}
onInput={onSearchChange}
onKeyDown={onSearchKeyDown}
/>
</div>
<div class={cn('overflow-y-auto')}>
{@render children()}
{#if isEmpty}
<div class="px-2 py-3 text-center text-sm text-muted-foreground">{emptyMessage}</div>
{/if}
</div>
{#if footer}
<DropdownMenu.Separator />
{@render footer()}
{/if}
</DropdownMenu.Content>
</DropdownMenu.Root>

View File

@@ -486,6 +486,8 @@
text-decoration: underline;
text-underline-offset: 2px;
transition: color 0.2s ease;
overflow-wrap: anywhere;
word-break: break-all;
}
div :global(a:hover) {

View File

@@ -51,3 +51,75 @@ export function formatNumber(num: number | unknown): string {
return num.toLocaleString();
}
/**
* Format JSON string with pretty printing (2-space indentation)
* Returns original string if parsing fails
*
* @param jsonString - JSON string to format
* @returns Pretty-printed JSON string or original if invalid
*/
export function formatJsonPretty(jsonString: string): string {
try {
const parsed = JSON.parse(jsonString);
return JSON.stringify(parsed, null, 2);
} catch {
return jsonString;
}
}
/**
* Format time as HH:MM:SS in 24-hour format
*
* @param date - Date object to format
* @returns Formatted time string (HH:MM:SS)
*/
export function formatTime(date: Date): string {
return date.toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
});
}
/**
* Formats milliseconds to a human-readable time string for performance metrics.
* Examples: "4h 12min 54s", "12min 34s", "45s", "0.5s"
*
* @param ms - Time in milliseconds
* @returns Formatted time string
*/
export function formatPerformanceTime(ms: number): string {
if (ms < 0) return '0s';
const totalSeconds = ms / 1000;
if (totalSeconds < 1) {
return `${totalSeconds.toFixed(1)}s`;
}
if (totalSeconds < 10) {
return `${totalSeconds.toFixed(1)}s`;
}
const hours = Math.floor(totalSeconds / 3600);
const minutes = Math.floor((totalSeconds % 3600) / 60);
const seconds = Math.floor(totalSeconds % 60);
const parts: string[] = [];
if (hours > 0) {
parts.push(`${hours}h`);
}
if (minutes > 0) {
parts.push(`${minutes}min`);
}
if (seconds > 0 || parts.length === 0) {
parts.push(`${seconds}s`);
}
return parts.join(' ');
}

View File

@@ -2,7 +2,6 @@
import { defineMeta } from '@storybook/addon-svelte-csf';
import ChatForm from '$lib/components/app/chat/ChatForm/ChatForm.svelte';
import { expect } from 'storybook/test';
import { mockServerProps, mockConfigs } from './fixtures/storybook-mocks';
import jpgAsset from './fixtures/assets/1.jpg?url';
import svgAsset from './fixtures/assets/hf-logo.svg?url';
import pdfAsset from './fixtures/assets/example.pdf?raw';
@@ -46,8 +45,6 @@
name="Default"
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
play={async ({ canvas, userEvent }) => {
mockServerProps(mockConfigs.noModalities);
const textarea = await canvas.findByRole('textbox');
const submitButton = await canvas.findByRole('button', { name: 'Send' });
@@ -66,73 +63,11 @@
const fileInput = document.querySelector('input[type="file"]');
await expect(fileInput).not.toHaveAttribute('accept');
// Open file attachments dropdown
const fileUploadButton = canvas.getByText('Attach files');
await userEvent.click(fileUploadButton);
// Check dropdown menu items are disabled (no modalities)
const imagesButton = document.querySelector('.images-button');
const audioButton = document.querySelector('.audio-button');
await expect(imagesButton).toHaveAttribute('data-disabled');
await expect(audioButton).toHaveAttribute('data-disabled');
// Close dropdown by pressing Escape
await userEvent.keyboard('{Escape}');
}}
/>
<Story name="Loading" args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]', isLoading: true }} />
<Story
name="VisionModality"
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
play={async ({ canvas, userEvent }) => {
mockServerProps(mockConfigs.visionOnly);
// Open file attachments dropdown and verify it works
const fileUploadButton = canvas.getByText('Attach files');
await userEvent.click(fileUploadButton);
// Verify dropdown menu items exist
const imagesButton = document.querySelector('.images-button');
const audioButton = document.querySelector('.audio-button');
await expect(imagesButton).toBeInTheDocument();
await expect(audioButton).toBeInTheDocument();
// Close dropdown by pressing Escape
await userEvent.keyboard('{Escape}');
console.log(' Vision modality: Dropdown menu verified');
}}
/>
<Story
name="AudioModality"
args={{ class: 'max-w-[56rem] w-[calc(100vw-2rem)]' }}
play={async ({ canvas, userEvent }) => {
mockServerProps(mockConfigs.audioOnly);
// Open file attachments dropdown and verify it works
const fileUploadButton = canvas.getByText('Attach files');
await userEvent.click(fileUploadButton);
// Verify dropdown menu items exist
const imagesButton = document.querySelector('.images-button');
const audioButton = document.querySelector('.audio-button');
await expect(imagesButton).toBeInTheDocument();
await expect(audioButton).toBeInTheDocument();
// Close dropdown by pressing Escape
await userEvent.keyboard('{Escape}');
console.log(' Audio modality: Dropdown menu verified');
}}
/>
<Story
name="FileAttachments"
args={{
@@ -140,8 +75,6 @@
uploadedFiles: fileAttachments
}}
play={async ({ canvas }) => {
mockServerProps(mockConfigs.bothModalities);
const jpgAttachment = canvas.getByAltText('1.jpg');
const svgAttachment = canvas.getByAltText('hf-logo.svg');
const pdfFileExtension = canvas.getByText('PDF');

View File

@@ -39,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL)
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
set(BORINGSSL_VERSION "0.20260204.0" CACHE STRING "BoringSSL version")
set(BORINGSSL_VERSION "0.20260211.0" CACHE STRING "BoringSSL version")
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")