mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-05 13:53:23 +02:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a498c75ad1 | ||
|
|
3409ab842d | ||
|
|
c342c3b93d | ||
|
|
af252d0758 | ||
|
|
11fb327bf3 | ||
|
|
e6e934c5ea | ||
|
|
b536eb0233 | ||
|
|
e0c93af2a0 | ||
|
|
423bee462b | ||
|
|
8abcc70a74 | ||
|
|
eaba92c3dc | ||
|
|
6ab881b7c3 |
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -293,6 +293,7 @@ jobs:
|
||||
cmake -B build \
|
||||
-DLLAMA_FATAL_WARNINGS=ON \
|
||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||
|
||||
@@ -303,6 +304,7 @@ jobs:
|
||||
cmake -B build \
|
||||
-DLLAMA_FATAL_WARNINGS=ON \
|
||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||
-DGGML_OPENMP=OFF
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
/examples/batched.swift/ @ggerganov
|
||||
/examples/batched/ @ggerganov
|
||||
/examples/convert-llama2c-to-ggml/ @ggerganov
|
||||
/examples/debug/ @danbev @pwilkin
|
||||
/examples/deprecation-warning/ @ggerganov
|
||||
/examples/diffusion/ @am17an
|
||||
/examples/embedding/ @ggerganov
|
||||
|
||||
@@ -45,6 +45,8 @@ static float common_ggml_get_float_value(const uint8_t * data,
|
||||
return v;
|
||||
}
|
||||
|
||||
#define INDENT " "
|
||||
|
||||
template <bool abort>
|
||||
void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
|
||||
GGML_ASSERT(n > 0);
|
||||
@@ -60,41 +62,41 @@ void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * n
|
||||
}
|
||||
}
|
||||
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
|
||||
LOG_ERR(" [\n");
|
||||
LOG(INDENT "[\n");
|
||||
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
|
||||
if (i2 == n && ne[2] > 2 * n) {
|
||||
LOG_ERR(" ..., \n");
|
||||
LOG(INDENT INDENT "..., \n");
|
||||
i2 = ne[2] - n;
|
||||
}
|
||||
LOG_ERR(" [\n");
|
||||
LOG(INDENT INDENT "[\n");
|
||||
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
|
||||
if (i1 == n && ne[1] > 2 * n) {
|
||||
LOG_ERR(" ..., \n");
|
||||
LOG(INDENT INDENT INDENT "..., \n");
|
||||
i1 = ne[1] - n;
|
||||
}
|
||||
LOG_ERR(" [");
|
||||
LOG(INDENT INDENT INDENT "[");
|
||||
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
|
||||
if (i0 == n && ne[0] > 2 * n) {
|
||||
LOG_ERR("..., ");
|
||||
LOG(" ..., ");
|
||||
i0 = ne[0] - n;
|
||||
}
|
||||
const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
|
||||
LOG_ERR("%12.4f", v);
|
||||
LOG("%12.4f", v);
|
||||
if (i0 < ne[0] - 1) {
|
||||
LOG_ERR(", ");
|
||||
LOG(", ");
|
||||
}
|
||||
}
|
||||
LOG_ERR("],\n");
|
||||
LOG(" ],\n");
|
||||
}
|
||||
LOG_ERR(" ],\n");
|
||||
LOG(INDENT INDENT "],\n");
|
||||
}
|
||||
LOG_ERR(" ]\n");
|
||||
LOG_ERR(" sum = %f\n", sum);
|
||||
LOG(INDENT "]\n");
|
||||
LOG(INDENT "sum = %f\n", sum);
|
||||
}
|
||||
|
||||
if constexpr (abort) {
|
||||
if (std::isnan(sum)) {
|
||||
LOG_ERR("encountered NaN - aborting\n");
|
||||
LOG("encountered NaN - aborting\n");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
@@ -137,9 +139,9 @@ template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, b
|
||||
}
|
||||
|
||||
if (matches_filter) {
|
||||
LOG_ERR("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
|
||||
ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
|
||||
common_ggml_ne_string(t).c_str());
|
||||
LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
|
||||
ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
|
||||
common_ggml_ne_string(t).c_str());
|
||||
}
|
||||
|
||||
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
|
||||
|
||||
159
examples/model-conversion/scripts/utils/tensor-info.py
Executable file
159
examples/model-conversion/scripts/utils/tensor-info.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from safetensors import safe_open
|
||||
|
||||
|
||||
MODEL_SAFETENSORS_FILE = "model.safetensors"
|
||||
MODEL_SAFETENSORS_INDEX = "model.safetensors.index.json"
|
||||
|
||||
|
||||
def get_weight_map(model_path: Path) -> Optional[dict[str, str]]:
|
||||
index_file = model_path / MODEL_SAFETENSORS_INDEX
|
||||
|
||||
if index_file.exists():
|
||||
with open(index_file, 'r') as f:
|
||||
index = json.load(f)
|
||||
return index.get("weight_map", {})
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_all_tensor_names(model_path: Path) -> list[str]:
|
||||
weight_map = get_weight_map(model_path)
|
||||
|
||||
if weight_map is not None:
|
||||
return list(weight_map.keys())
|
||||
|
||||
single_file = model_path / MODEL_SAFETENSORS_FILE
|
||||
if single_file.exists():
|
||||
try:
|
||||
with safe_open(single_file, framework="pt", device="cpu") as f:
|
||||
return list(f.keys())
|
||||
except Exception as e:
|
||||
print(f"Error reading {single_file}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Error: No safetensors files found in {model_path}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def find_tensor_file(model_path: Path, tensor_name: str) -> Optional[str]:
|
||||
weight_map = get_weight_map(model_path)
|
||||
|
||||
if weight_map is not None:
|
||||
return weight_map.get(tensor_name)
|
||||
|
||||
single_file = model_path / MODEL_SAFETENSORS_FILE
|
||||
if single_file.exists():
|
||||
return single_file.name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def normalize_tensor_name(tensor_name: str) -> str:
|
||||
normalized = re.sub(r'\.\d+\.', '.#.', tensor_name)
|
||||
normalized = re.sub(r'\.\d+$', '.#', normalized)
|
||||
return normalized
|
||||
|
||||
|
||||
def list_all_tensors(model_path: Path, unique: bool = False):
|
||||
tensor_names = get_all_tensor_names(model_path)
|
||||
|
||||
if unique:
|
||||
seen = set()
|
||||
for tensor_name in sorted(tensor_names):
|
||||
normalized = normalize_tensor_name(tensor_name)
|
||||
if normalized not in seen:
|
||||
seen.add(normalized)
|
||||
print(normalized)
|
||||
else:
|
||||
for tensor_name in sorted(tensor_names):
|
||||
print(tensor_name)
|
||||
|
||||
|
||||
def print_tensor_info(model_path: Path, tensor_name: str):
|
||||
tensor_file = find_tensor_file(model_path, tensor_name)
|
||||
|
||||
if tensor_file is None:
|
||||
print(f"Error: Could not find tensor '{tensor_name}' in model index")
|
||||
print(f"Model path: {model_path}")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = model_path / tensor_file
|
||||
|
||||
try:
|
||||
with safe_open(file_path, framework="pt", device="cpu") as f:
|
||||
if tensor_name in f.keys():
|
||||
tensor_slice = f.get_slice(tensor_name)
|
||||
shape = tensor_slice.get_shape()
|
||||
print(f"Tensor: {tensor_name}")
|
||||
print(f"File: {tensor_file}")
|
||||
print(f"Shape: {shape}")
|
||||
else:
|
||||
print(f"Error: Tensor '{tensor_name}' not found in {tensor_file}")
|
||||
sys.exit(1)
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The file '{file_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Print tensor information from a safetensors model"
|
||||
)
|
||||
parser.add_argument(
|
||||
"tensor_name",
|
||||
nargs="?", # optional (if --list is used for example)
|
||||
help="Name of the tensor to inspect"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m", "--model-path",
|
||||
type=Path,
|
||||
help="Path to the model directory (default: MODEL_PATH environment variable)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l", "--list",
|
||||
action="store_true",
|
||||
help="List unique tensor patterns in the model (layer numbers replaced with #)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
model_path = args.model_path
|
||||
if model_path is None:
|
||||
model_path_str = os.environ.get("MODEL_PATH")
|
||||
if model_path_str is None:
|
||||
print("Error: --model-path not provided and MODEL_PATH environment variable not set")
|
||||
sys.exit(1)
|
||||
model_path = Path(model_path_str)
|
||||
|
||||
if not model_path.exists():
|
||||
print(f"Error: Model path does not exist: {model_path}")
|
||||
sys.exit(1)
|
||||
|
||||
if not model_path.is_dir():
|
||||
print(f"Error: Model path is not a directory: {model_path}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.list:
|
||||
list_all_tensors(model_path, unique=True)
|
||||
else:
|
||||
if args.tensor_name is None:
|
||||
print("Error: tensor_name is required when not using --list")
|
||||
sys.exit(1)
|
||||
print_tensor_info(model_path, args.tensor_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -7,6 +7,9 @@
|
||||
#include "ggml-metal-context.h"
|
||||
#include "ggml-metal-ops.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
#define GGML_METAL_NAME "MTL"
|
||||
#define GGML_METAL_MAX_DEVICES 16
|
||||
|
||||
|
||||
@@ -1263,25 +1263,30 @@ struct vk_op_diag_mask_push_constants {
|
||||
|
||||
struct vk_op_rope_push_constants {
|
||||
uint32_t rope_mode;
|
||||
uint32_t ncols;
|
||||
uint32_t nrows;
|
||||
uint32_t n_dims;
|
||||
float freq_scale;
|
||||
uint32_t p_delta_rows;
|
||||
float freq_base;
|
||||
float ext_factor;
|
||||
float attn_factor;
|
||||
float corr_dims[2];
|
||||
float theta_scale;
|
||||
uint32_t has_ff;
|
||||
uint32_t ne02;
|
||||
uint32_t s1;
|
||||
uint32_t s2;
|
||||
int32_t sections[4];
|
||||
uint32_t is_imrope;
|
||||
uint32_t is_back;
|
||||
uint32_t set_rows_stride;
|
||||
uint32_t ne00;
|
||||
uint32_t ne01;
|
||||
uint32_t ne02;
|
||||
uint32_t nb01;
|
||||
uint32_t nb02;
|
||||
uint32_t nb03;
|
||||
uint32_t nb11;
|
||||
uint32_t nb12;
|
||||
uint32_t nb13;
|
||||
};
|
||||
static_assert(sizeof(vk_op_rope_push_constants) <= 128, "sizeof(vk_op_rope_push_constants) must be <= 128");
|
||||
|
||||
// For fused rms_norm+mul+rope(+view+set_rows)
|
||||
struct vk_op_rms_norm_mul_rope_push_constants {
|
||||
@@ -3199,9 +3204,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
||||
const uint32_t D_lsb = D ^ (D & (D-1));
|
||||
uint32_t D_split = std::min(std::min(device->subgroup_size, 8u), D_lsb / 4);
|
||||
|
||||
// Nvidia prefers shared memory use to load large tiles of K
|
||||
// Nvidia prefers shared memory use to load large tiles of K.
|
||||
// Switch to loading from global memory when it would use too much shared memory.
|
||||
// AMD prefers loading K directly from global memory
|
||||
const uint32_t k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA ? 1 : 0;
|
||||
const uint32_t k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256 ? 1 : 0;
|
||||
|
||||
return {wg_size, rows_cols[0], rows_cols[1], hsk, hsv, clamp, D_split, device->subgroup_size, k_load_shmem};
|
||||
};
|
||||
@@ -5555,9 +5561,9 @@ static void ggml_vk_instance_init() {
|
||||
// Check if there are two physical devices corresponding to the same GPU
|
||||
// This handles the case where the same GPU appears with different drivers (e.g., RADV + AMDVLK on Linux),
|
||||
// see https://github.com/ggml-org/llama.cpp/pull/7582 for original deduplication.
|
||||
// However, for MoltenVK on macOS, multiple GPUs on the same card may report the same UUID,
|
||||
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Until this is fixed, we'll only deduplicate
|
||||
// when drivers differ (same driver + same UUID = likely different GPUs)
|
||||
// MoltenVK on macOS may report the same UUID for distinct GPUs on multi-GPU cards,
|
||||
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Skip when both old/new
|
||||
// driver is MoltenVK
|
||||
auto old_device = std::find_if(
|
||||
vk_instance.device_indices.begin(),
|
||||
vk_instance.device_indices.end(),
|
||||
@@ -5574,11 +5580,9 @@ static void ggml_vk_instance_init() {
|
||||
old_id.deviceLUIDValid && new_id.deviceLUIDValid &&
|
||||
std::equal(std::begin(old_id.deviceLUID), std::end(old_id.deviceLUID), std::begin(new_id.deviceLUID))
|
||||
);
|
||||
bool both_molten_vk = (new_driver.driverID == vk::DriverId::eMoltenvk && old_driver.driverID == vk::DriverId::eMoltenvk);
|
||||
|
||||
// Only deduplicate if same UUID AND different drivers
|
||||
// (same driver + same UUID on MoltenVK = likely different GPUs on multi-GPU card)
|
||||
bool different_driver = (old_driver.driverID != new_driver.driverID);
|
||||
return same_uuid && different_driver;
|
||||
return same_uuid && !both_molten_vk;
|
||||
}
|
||||
);
|
||||
if (old_device == vk_instance.device_indices.end()) {
|
||||
@@ -8407,7 +8411,7 @@ static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, co
|
||||
const uint32_t sfshstride = (hsk <= 128) ? (Br + 8) : Br;
|
||||
const uint32_t sfsh = Bc * sfshstride * acctype;
|
||||
|
||||
const bool k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA;
|
||||
const bool k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256;
|
||||
const uint32_t kshstride = (k_load_shmem ? hsk_pad : MatBr) / 4 + 2;
|
||||
const uint32_t vsh_stride = MatBc / 4 * row_split;
|
||||
const uint32_t ksh = ((kshstride >= vsh_stride) ? (Bc * kshstride) : (Bc * vsh_stride)) * f16vec4;
|
||||
@@ -10405,12 +10409,22 @@ static vk_op_rope_push_constants ggml_vk_make_rope_constants(const ggml_tensor *
|
||||
|
||||
uint32_t nb01 = src0->nb[1] / ggml_type_size(src0->type);
|
||||
uint32_t nb02 = src0->nb[2] / ggml_type_size(src0->type);
|
||||
uint32_t nb03 = src0->nb[3] / ggml_type_size(src0->type);
|
||||
|
||||
uint32_t nb11 = dst->nb[1] / ggml_type_size(dst->type);
|
||||
uint32_t nb12 = dst->nb[2] / ggml_type_size(dst->type);
|
||||
uint32_t nb13 = dst->nb[3] / ggml_type_size(dst->type);
|
||||
|
||||
vk_op_rope_push_constants rope {
|
||||
(uint32_t)mode, (uint32_t)src0->ne[0], (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1],
|
||||
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale,
|
||||
has_ff, (uint32_t)src0->ne[2], nb01, nb02,
|
||||
(uint32_t)mode, (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale,
|
||||
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale, has_ff,
|
||||
{ sections[0], sections[1], sections[2], sections[3] }, is_imrope, backprop, set_rows_stride,
|
||||
|
||||
(uint32_t)src0->ne[0],
|
||||
(uint32_t)src0->ne[1],
|
||||
(uint32_t)src0->ne[2],
|
||||
nb01, nb02, nb03,
|
||||
nb11, nb12, nb13,
|
||||
};
|
||||
|
||||
return rope;
|
||||
@@ -14798,6 +14812,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||
case GGML_OP_REPEAT_BACK:
|
||||
return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_ROPE:
|
||||
return ggml_is_contiguous_rows(op) && ggml_is_contiguous_rows(op->src[0]);
|
||||
case GGML_OP_ROPE_BACK:
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
|
||||
@@ -112,12 +112,11 @@ void rms_norm(uint num_iters) {
|
||||
#if RMS_NORM_ROPE_FUSION
|
||||
barrier();
|
||||
rope_params rp = p.rope;
|
||||
uint rope_row = (samp*nchannels + channel)*nrows + row;
|
||||
for (uint t = 2*tid; t < ncols; t += 2*BLOCK_SIZE) {
|
||||
if (rp.rope_mode == GGML_ROPE_TYPE_NEOX) {
|
||||
rope_neox(t, rope_row, rp);
|
||||
rope_neox(t, row, channel, samp, rp);
|
||||
} else if (rp.rope_mode == GGML_ROPE_TYPE_NORMAL) {
|
||||
rope_norm(t, rope_row, rp);
|
||||
rope_norm(t, row, channel, samp, rp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -4,12 +4,12 @@ float rope_yarn_ramp(const float low, const float high, const uint i0) {
|
||||
return 1.0f - min(1.0f, max(0.0f, y));
|
||||
}
|
||||
|
||||
uint rope_a_coord(const uint i0, const uint i01, const uint i02, rope_params p) {
|
||||
uint rope_a_coord(const uint i0, const uint i01, const uint i02, const uint i03, rope_params p) {
|
||||
#if RMS_NORM_ROPE_FUSION
|
||||
// Per-row offset in shared memory
|
||||
const uint ix = i0;
|
||||
#else
|
||||
const uint ix = i02*p.nb02 + i01*p.nb01 + i0;
|
||||
const uint ix = i03*p.nb03 + i02*p.nb02 + i01*p.nb01 + i0;
|
||||
#endif
|
||||
return ix;
|
||||
}
|
||||
@@ -34,26 +34,19 @@ void rope_yarn(const float theta_extrap, const uint i0, out float cos_theta, out
|
||||
sin_theta = sin(theta) * mscale;
|
||||
}
|
||||
|
||||
void rope_norm(const uint i0, const uint i1, rope_params p) {
|
||||
uint ne0 = p.ncols;
|
||||
uint ne1 = p.p_delta_rows;
|
||||
|
||||
if (i0 >= ne0) {
|
||||
void rope_norm(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
|
||||
if (i0 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
||||
const uint i01 = i1 % ne1;
|
||||
const uint i02 = i1 / ne1;
|
||||
|
||||
uint idst = i1*ne0 + i0;
|
||||
const uint ix = rope_a_coord(i0, i01, i02, p);
|
||||
uint idst = i0 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
|
||||
const uint ix = rope_a_coord(i0, i1, i2, i3, p);
|
||||
|
||||
// Fusion optimization: ROPE + VIEW + SET_ROWS.
|
||||
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
|
||||
if (p.set_rows_stride != 0) {
|
||||
idst = i01*ne0 + i0;
|
||||
idst += rope_data_i[i02].x * p.set_rows_stride;
|
||||
idst = i1*p.nb11 + i0;
|
||||
idst += rope_data_i[i2].x * p.set_rows_stride;
|
||||
}
|
||||
|
||||
if (i0 >= p.n_dims) {
|
||||
@@ -63,7 +56,7 @@ void rope_norm(const uint i0, const uint i1, rope_params p) {
|
||||
return;
|
||||
}
|
||||
|
||||
const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f);
|
||||
const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f);
|
||||
|
||||
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
|
||||
|
||||
@@ -77,25 +70,19 @@ void rope_norm(const uint i0, const uint i1, rope_params p) {
|
||||
rope_data_d[idst + 1] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta);
|
||||
}
|
||||
|
||||
void rope_neox(const uint i0, const uint i1, rope_params p) {
|
||||
uint ne0 = p.ncols;
|
||||
uint ne1 = p.p_delta_rows;
|
||||
|
||||
if (i0 >= ne0) {
|
||||
void rope_neox(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
|
||||
if (i0 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint i01 = i1 % ne1;
|
||||
const uint i02 = i1 / ne1;
|
||||
|
||||
uint idst = i1*ne0 + i0/2;
|
||||
const uint ix = rope_a_coord(i0/2, i01, i02, p);
|
||||
uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
|
||||
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
|
||||
|
||||
// Fusion optimization: ROPE + VIEW + SET_ROWS.
|
||||
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
|
||||
if (p.set_rows_stride != 0) {
|
||||
idst = i01*ne0 + i0/2;
|
||||
idst += rope_data_i[i02].x * p.set_rows_stride;
|
||||
idst = i1*p.nb11 + i0/2;
|
||||
idst += rope_data_i[i2].x * p.set_rows_stride;
|
||||
}
|
||||
|
||||
if (i0 >= p.n_dims) {
|
||||
@@ -105,7 +92,7 @@ void rope_neox(const uint i0, const uint i1, rope_params p) {
|
||||
return;
|
||||
}
|
||||
|
||||
const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f);
|
||||
const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f);
|
||||
|
||||
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
|
||||
|
||||
@@ -120,26 +107,19 @@ void rope_neox(const uint i0, const uint i1, rope_params p) {
|
||||
}
|
||||
|
||||
|
||||
void rope_multi(const uint i0, const uint i1, rope_params p) {
|
||||
uint ne0 = p.ncols;
|
||||
uint ne1 = p.p_delta_rows;
|
||||
uint ne2 = p.ne02;
|
||||
|
||||
if (i0 >= ne0) {
|
||||
void rope_multi(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
|
||||
if (i0 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint i01 = i1 % ne1;
|
||||
const uint i02 = i1 / ne1;
|
||||
|
||||
uint idst = i1*ne0 + i0/2;
|
||||
const uint ix = rope_a_coord(i0/2, i01, i02, p);
|
||||
uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
|
||||
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
|
||||
|
||||
// Fusion optimization: ROPE + VIEW + SET_ROWS.
|
||||
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
|
||||
if (p.set_rows_stride != 0) {
|
||||
idst = i01*ne0 + i0/2;
|
||||
idst += rope_data_i[i02].x * p.set_rows_stride;
|
||||
idst = i1*p.nb11 + i0/2;
|
||||
idst += rope_data_i[i2].x * p.set_rows_stride;
|
||||
}
|
||||
|
||||
if (i0 >= p.n_dims) {
|
||||
@@ -156,26 +136,26 @@ void rope_multi(const uint i0, const uint i1, rope_params p) {
|
||||
float theta_base = 0.0;
|
||||
if (p.is_imrope != 0) {
|
||||
if (sector % 3 == 1 && sector < 3 * p.sections[1]) {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f);
|
||||
} else if (sector % 3 == 2 && sector < 3 * p.sections[2]) {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f);
|
||||
} else if (sector % 3 == 0 && sector < 3 * p.sections[0]) {
|
||||
theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f);
|
||||
} else {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f);
|
||||
}
|
||||
} else {
|
||||
if (sector < p.sections[0]) {
|
||||
theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f);
|
||||
}
|
||||
else if (sector >= p.sections[0] && sector < sec_w) {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f);
|
||||
}
|
||||
else if (sector >= sec_w && sector < sec_w + p.sections[2]) {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f);
|
||||
}
|
||||
else if (sector >= sec_w + p.sections[2]) {
|
||||
theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
|
||||
theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,20 +171,13 @@ void rope_multi(const uint i0, const uint i1, rope_params p) {
|
||||
rope_data_d[idst + p.n_dims/2] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta);
|
||||
}
|
||||
|
||||
void rope_vision(const uint i0, const uint i1, rope_params p) {
|
||||
uint ne0 = p.ncols;
|
||||
uint ne1 = p.p_delta_rows;
|
||||
uint ne2 = p.ne02;
|
||||
|
||||
if (i0 >= ne0) {
|
||||
void rope_vision(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
|
||||
if (i0 >= p.ne00) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint i01 = i1 % ne1;
|
||||
const uint i02 = i1 / ne1;
|
||||
|
||||
const uint idst = i1*ne0 + i0/2;
|
||||
const uint ix = rope_a_coord(i0/2, i01, i02, p);
|
||||
const uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
|
||||
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
|
||||
|
||||
const int sect_dims = p.sections[0] + p.sections[1];
|
||||
const int sec_w = p.sections[1] + p.sections[0];
|
||||
@@ -213,11 +186,11 @@ void rope_vision(const uint i0, const uint i1, rope_params p) {
|
||||
float theta_base = 0.0;
|
||||
if (sector < p.sections[0]) {
|
||||
const uint p0 = sector;
|
||||
theta_base = rope_data_pos[i02]*pow(p.theta_scale, p0);
|
||||
theta_base = rope_data_pos[i2]*pow(p.theta_scale, p0);
|
||||
}
|
||||
else if (sector >= p.sections[0] && sector < sec_w) {
|
||||
const uint p0 = sector - p.sections[0];
|
||||
theta_base = rope_data_pos[i02 + ne2]*pow(p.theta_scale, p0);
|
||||
theta_base = rope_data_pos[i2 + p.ne02]*pow(p.theta_scale, p0);
|
||||
}
|
||||
|
||||
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
|
||||
|
||||
@@ -5,10 +5,13 @@
|
||||
|
||||
void main() {
|
||||
const uint i0 = 2*gl_GlobalInvocationID.y;
|
||||
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
||||
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (i1 >= pc.nrows) {
|
||||
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (row >= pc.nrows) {
|
||||
return;
|
||||
}
|
||||
rope_multi(i0, i1, pc);
|
||||
const uint i3 = row / (pc.ne01*pc.ne02);
|
||||
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
|
||||
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
|
||||
|
||||
rope_multi(i0, i1, i2, i3, pc);
|
||||
}
|
||||
|
||||
@@ -5,10 +5,13 @@
|
||||
|
||||
void main() {
|
||||
const uint i0 = 2*gl_GlobalInvocationID.y;
|
||||
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
||||
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (i1 >= pc.nrows) {
|
||||
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (row >= pc.nrows) {
|
||||
return;
|
||||
}
|
||||
rope_neox(i0, i1, pc);
|
||||
const uint i3 = row / (pc.ne01*pc.ne02);
|
||||
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
|
||||
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
|
||||
|
||||
rope_neox(i0, i1, i2, i3, pc);
|
||||
}
|
||||
|
||||
@@ -5,10 +5,13 @@
|
||||
|
||||
void main() {
|
||||
const uint i0 = 2*gl_GlobalInvocationID.y;
|
||||
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
||||
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (i1 >= pc.nrows) {
|
||||
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (row >= pc.nrows) {
|
||||
return;
|
||||
}
|
||||
rope_norm(i0, i1, pc);
|
||||
const uint i3 = row / (pc.ne01*pc.ne02);
|
||||
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
|
||||
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
|
||||
|
||||
rope_norm(i0, i1, i2, i3, pc);
|
||||
}
|
||||
|
||||
@@ -5,24 +5,29 @@
|
||||
|
||||
struct rope_params {
|
||||
uint rope_mode;
|
||||
uint ncols;
|
||||
uint nrows;
|
||||
uint n_dims;
|
||||
float freq_scale;
|
||||
uint p_delta_rows;
|
||||
float freq_base;
|
||||
float ext_factor;
|
||||
float attn_factor;
|
||||
float corr_dims[2];
|
||||
float theta_scale;
|
||||
uint has_ff;
|
||||
uint ne02;
|
||||
uint nb01;
|
||||
uint nb02;
|
||||
int sections[4];
|
||||
uint is_imrope;
|
||||
uint is_back;
|
||||
uint set_rows_stride;
|
||||
|
||||
uint ne00;
|
||||
uint ne01;
|
||||
uint ne02;
|
||||
uint nb01;
|
||||
uint nb02;
|
||||
uint nb03;
|
||||
uint nb11;
|
||||
uint nb12;
|
||||
uint nb13;
|
||||
};
|
||||
|
||||
#endif // !defined(GGML_ROPE_PARAMS)
|
||||
|
||||
@@ -5,10 +5,13 @@
|
||||
|
||||
void main() {
|
||||
const uint i0 = 2*gl_GlobalInvocationID.y;
|
||||
// i1 is actually i2*nb2+i1, but the rows are contiguous
|
||||
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (i1 >= pc.nrows) {
|
||||
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
|
||||
if (row >= pc.nrows) {
|
||||
return;
|
||||
}
|
||||
rope_vision(i0, i1, pc);
|
||||
const uint i3 = row / (pc.ne01*pc.ne02);
|
||||
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
|
||||
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
|
||||
|
||||
rope_vision(i0, i1, i2, i3, pc);
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ vendor = {
|
||||
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE": "vendor/cpp-httplib/LICENSE",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE",
|
||||
|
||||
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
|
||||
}
|
||||
|
||||
@@ -265,9 +265,15 @@ std::pair<ggml_tensor *, ggml_tensor *> llm_build_qwen3next::build_delta_net_chu
|
||||
cb(g_diff, "g_diff", il); // shape: (chunk_size, 1, n_chunks, H_v * n_seqs)
|
||||
|
||||
ggml_tensor * g_diff_exp = ggml_exp(ctx0, g_diff);
|
||||
ggml_tensor * key_gdiff = ggml_mul(ctx0, k, g_diff_exp);
|
||||
ggml_tensor * g_diff_exp_t = ggml_reshape_4d(ctx0, g_diff_exp,
|
||||
1, chunk_size, n_chunks, g_diff_exp->ne[3]);
|
||||
|
||||
ggml_tensor * key_gdiff = ggml_mul(ctx0, k, g_diff_exp_t);
|
||||
cb(key_gdiff, "key_gdiff", il); // shape: (S_k, chunk_size, n_chunks, H_v * n_seqs)
|
||||
|
||||
ggml_tensor * key_gdiff_t = ggml_cont(ctx0, ggml_transpose(ctx0, key_gdiff));
|
||||
cb(key_gdiff_t, "key_gdiff_t", il); // shape: (chunk_size, S_k, n_chunks, H_v * n_seqs)
|
||||
|
||||
|
||||
// state to be updated per chunk
|
||||
ggml_tensor * new_state = state; // ggml_dup(ctx0, state);
|
||||
@@ -322,9 +328,9 @@ std::pair<ggml_tensor *, ggml_tensor *> llm_build_qwen3next::build_delta_net_chu
|
||||
: ggml_concat(ctx0, core_attn_out, core_attn_out_chunk, 2);
|
||||
|
||||
// kgdmulvnew = (key_gdiff).transpose(-1, -2) @ v_new
|
||||
ggml_tensor * k_gdiff = ggml_cont(ctx0, get_slice_2d(ctx0, key_gdiff, chunk));
|
||||
ggml_tensor * k_gdiff_t = get_slice_2d(ctx0, key_gdiff_t, chunk);
|
||||
//ggml_tensor * kgdmulvnew = ggml_mul_mat(ctx0, k_gdiff, v_new); // this is slower on metal, why?
|
||||
ggml_tensor * kgdmulvnew = ggml_mul_mat(ctx0, v_new_t, ggml_cont(ctx0, ggml_transpose(ctx0, k_gdiff)));
|
||||
ggml_tensor * kgdmulvnew = ggml_mul_mat(ctx0, v_new_t, k_gdiff_t);
|
||||
|
||||
// last_recurrent_state = last_recurrent_state * g_last + kgdmulvnew
|
||||
ggml_tensor * gexp_last_chunk = ggml_cont(ctx0, get_slice_2d(ctx0, g_last_exp, chunk));
|
||||
|
||||
@@ -8032,6 +8032,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
for (int mode : {GGML_ROPE_TYPE_NORMAL, GGML_ROPE_TYPE_NEOX, GGML_ROPE_TYPE_MROPE, GGML_ROPE_TYPE_IMROPE, GGML_ROPE_TYPE_VISION}) {
|
||||
for (bool ff : {false, true}) {
|
||||
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, mode, 512, 1.4245f, 0.7465f, 1.4245f, ff, 0, true, true));
|
||||
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, mode, 512, 1.4245f, 0.7465f, 1.4245f, ff, 1, true, true));
|
||||
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 3}, 128, mode, 512, 1.4245f, 0.7465f, 1.4245f, ff, 1, true, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
26
vendor/cpp-httplib/CMakeLists.txt
vendored
26
vendor/cpp-httplib/CMakeLists.txt
vendored
@@ -3,9 +3,14 @@ license_add_file("cpp-httplib" "LICENSE")
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
llama_add_compile_flags()
|
||||
|
||||
add_library(${TARGET} STATIC httplib.cpp httplib.h)
|
||||
if (NOT MSVC)
|
||||
# disable warnings in 3rd party code
|
||||
|
||||
# disable warnings in 3rd party code
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
target_compile_options(${TARGET} PRIVATE /w)
|
||||
else()
|
||||
target_compile_options(${TARGET} PRIVATE -w)
|
||||
endif()
|
||||
|
||||
@@ -146,6 +151,23 @@ elseif (LLAMA_OPENSSL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# disable warnings in 3rd party code
|
||||
if(LLAMA_BUILD_BORINGSSL OR LLAMA_BUILD_LIBRESSL)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
target_compile_options(ssl PRIVATE /w)
|
||||
target_compile_options(crypto PRIVATE /w)
|
||||
if(LLAMA_BUILD_BORINGSSL)
|
||||
target_compile_options(fipsmodule PRIVATE /w)
|
||||
endif()
|
||||
else()
|
||||
target_compile_options(ssl PRIVATE -w)
|
||||
target_compile_options(crypto PRIVATE -w)
|
||||
if(LLAMA_BUILD_BORINGSSL)
|
||||
target_compile_options(fipsmodule PRIVATE -w)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) # used in server.cpp
|
||||
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
||||
|
||||
238
vendor/cpp-httplib/httplib.cpp
vendored
238
vendor/cpp-httplib/httplib.cpp
vendored
@@ -117,6 +117,8 @@ time_t parse_http_date(const std::string &date_str) {
|
||||
|
||||
#ifdef _WIN32
|
||||
return _mkgmtime(&tm_buf);
|
||||
#elif defined _AIX
|
||||
return mktime(&tm_buf);
|
||||
#else
|
||||
return timegm(&tm_buf);
|
||||
#endif
|
||||
@@ -1376,7 +1378,7 @@ int getaddrinfo_with_timeout(const char *node, const char *service,
|
||||
|
||||
// Allocate on the heap, so the resolver thread can keep using the data.
|
||||
auto state = std::make_shared<GetAddrInfoState>();
|
||||
state->node = node;
|
||||
if (node) { state->node = node; }
|
||||
state->service = service;
|
||||
state->hints = *hints;
|
||||
|
||||
@@ -2896,10 +2898,20 @@ bool parse_range_header(const std::string &s, Ranges &ranges) try {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto first =
|
||||
static_cast<ssize_t>(lhs.empty() ? -1 : std::stoll(lhs));
|
||||
const auto last =
|
||||
static_cast<ssize_t>(rhs.empty() ? -1 : std::stoll(rhs));
|
||||
ssize_t first = -1;
|
||||
if (!lhs.empty()) {
|
||||
ssize_t v;
|
||||
auto res = detail::from_chars(lhs.data(), lhs.data() + lhs.size(), v);
|
||||
if (res.ec == std::errc{}) { first = v; }
|
||||
}
|
||||
|
||||
ssize_t last = -1;
|
||||
if (!rhs.empty()) {
|
||||
ssize_t v;
|
||||
auto res = detail::from_chars(rhs.data(), rhs.data() + rhs.size(), v);
|
||||
if (res.ec == std::errc{}) { last = v; }
|
||||
}
|
||||
|
||||
if ((first == -1 && last == -1) ||
|
||||
(first != -1 && last != -1 && first > last)) {
|
||||
all_valid_ranges = false;
|
||||
@@ -2974,25 +2986,17 @@ bool parse_accept_header(const std::string &s,
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CPPHTTPLIB_NO_EXCEPTIONS
|
||||
{
|
||||
std::istringstream iss(quality_str);
|
||||
iss >> accept_entry.quality;
|
||||
|
||||
// Check if conversion was successful and entire string was consumed
|
||||
if (iss.fail() || !iss.eof()) {
|
||||
double v = 0.0;
|
||||
auto res = detail::from_chars(
|
||||
quality_str.data(), quality_str.data() + quality_str.size(), v);
|
||||
if (res.ec == std::errc{}) {
|
||||
accept_entry.quality = v;
|
||||
} else {
|
||||
has_invalid_entry = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
try {
|
||||
accept_entry.quality = std::stod(quality_str);
|
||||
} catch (...) {
|
||||
has_invalid_entry = true;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
// Check if quality is in valid range [0.0, 1.0]
|
||||
if (accept_entry.quality < 0.0 || accept_entry.quality > 1.0) {
|
||||
has_invalid_entry = true;
|
||||
@@ -5570,13 +5574,26 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) {
|
||||
strm, req, res,
|
||||
// Regular
|
||||
[&](const char *buf, size_t n) {
|
||||
// Prevent arithmetic overflow when checking sizes.
|
||||
// Avoid computing (req.body.size() + n) directly because
|
||||
// adding two unsigned `size_t` values can wrap around and
|
||||
// produce a small result instead of indicating overflow.
|
||||
// Instead, check using subtraction: ensure `n` does not
|
||||
// exceed the remaining capacity `max_size() - size()`.
|
||||
if (req.body.size() >= req.body.max_size() ||
|
||||
n > req.body.max_size() - req.body.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Limit decompressed body size to payload_max_length_ to protect
|
||||
// against "zip bomb" attacks where a small compressed payload
|
||||
// decompresses to a massive size.
|
||||
if (req.body.size() + n > payload_max_length_ ||
|
||||
req.body.size() + n > req.body.max_size()) {
|
||||
if (payload_max_length_ > 0 &&
|
||||
(req.body.size() >= payload_max_length_ ||
|
||||
n > payload_max_length_ - req.body.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
req.body.append(buf, n);
|
||||
return true;
|
||||
},
|
||||
@@ -5666,22 +5683,29 @@ bool Server::read_content_core(
|
||||
// oversized request and fail early (causing connection close). For SSL
|
||||
// builds we cannot reliably peek the decrypted application bytes, so keep
|
||||
// the original behaviour.
|
||||
#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(_WIN32)
|
||||
#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
if (!req.has_header("Content-Length") &&
|
||||
!detail::is_chunked_transfer_encoding(req.headers)) {
|
||||
socket_t s = strm.socket();
|
||||
if (s != INVALID_SOCKET) {
|
||||
// Peek up to payload_max_length_ + 1 bytes. If more than
|
||||
// payload_max_length_ bytes are pending, reject the request.
|
||||
size_t to_peek =
|
||||
(payload_max_length_ > 0)
|
||||
? (std::min)(payload_max_length_ + 1, static_cast<size_t>(4096))
|
||||
: 1;
|
||||
std::vector<char> peekbuf(to_peek);
|
||||
ssize_t n = ::recv(s, peekbuf.data(), to_peek, MSG_PEEK);
|
||||
if (n > 0 && static_cast<size_t>(n) > payload_max_length_) {
|
||||
// Indicate failure so connection will be closed.
|
||||
return false;
|
||||
// Only peek if payload_max_length is set to a finite value
|
||||
if (payload_max_length_ > 0 &&
|
||||
payload_max_length_ < (std::numeric_limits<size_t>::max)()) {
|
||||
socket_t s = strm.socket();
|
||||
if (s != INVALID_SOCKET) {
|
||||
// Peek to check if there is any pending data
|
||||
char peekbuf[1];
|
||||
ssize_t n = ::recv(s, peekbuf, 1, MSG_PEEK);
|
||||
if (n > 0) {
|
||||
// There is data, so read it with payload limit enforcement
|
||||
auto result = detail::read_content_without_length(
|
||||
strm, payload_max_length_, out);
|
||||
if (result == detail::ReadContentResult::PayloadTooLarge) {
|
||||
res.status = StatusCode::PayloadTooLarge_413;
|
||||
return false;
|
||||
} else if (result != detail::ReadContentResult::Success) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -6656,7 +6680,8 @@ void ClientImpl::close_socket(Socket &socket) {
|
||||
}
|
||||
|
||||
bool ClientImpl::read_response_line(Stream &strm, const Request &req,
|
||||
Response &res) const {
|
||||
Response &res,
|
||||
bool skip_100_continue) const {
|
||||
std::array<char, 2048> buf{};
|
||||
|
||||
detail::stream_line_reader line_reader(strm, buf.data(), buf.size());
|
||||
@@ -6677,8 +6702,8 @@ bool ClientImpl::read_response_line(Stream &strm, const Request &req,
|
||||
res.status = std::stoi(std::string(m[2]));
|
||||
res.reason = std::string(m[3]);
|
||||
|
||||
// Ignore '100 Continue'
|
||||
while (res.status == StatusCode::Continue_100) {
|
||||
// Ignore '100 Continue' (only when not using Expect: 100-continue explicitly)
|
||||
while (skip_100_continue && res.status == StatusCode::Continue_100) {
|
||||
if (!line_reader.getline()) { return false; } // CRLF
|
||||
if (!line_reader.getline()) { return false; } // next response line
|
||||
|
||||
@@ -7463,7 +7488,8 @@ bool ClientImpl::write_content_with_provider(Stream &strm,
|
||||
}
|
||||
|
||||
bool ClientImpl::write_request(Stream &strm, Request &req,
|
||||
bool close_connection, Error &error) {
|
||||
bool close_connection, Error &error,
|
||||
bool skip_body) {
|
||||
// Prepare additional headers
|
||||
if (close_connection) {
|
||||
if (!req.has_header("Connection")) {
|
||||
@@ -7582,7 +7608,59 @@ bool ClientImpl::write_request(Stream &strm, Request &req,
|
||||
}
|
||||
}
|
||||
|
||||
// After sending request line and headers, wait briefly for an early server
|
||||
// response (e.g. 4xx) and avoid sending a potentially large request body
|
||||
// unnecessarily. This workaround is only enabled on Windows because Unix
|
||||
// platforms surface write errors (EPIPE) earlier; on Windows kernel send
|
||||
// buffering can accept large writes even when the peer already responded.
|
||||
// Check the stream first (which covers SSL via `is_readable()`), then
|
||||
// fall back to select on the socket. Only perform the wait for very large
|
||||
// request bodies to avoid interfering with normal small requests and
|
||||
// reduce side-effects. Poll briefly (up to 50ms as default) for an early
|
||||
// response. Skip this check when using Expect: 100-continue, as the protocol
|
||||
// handles early responses properly.
|
||||
#if defined(_WIN32)
|
||||
if (!skip_body &&
|
||||
req.body.size() > CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD &&
|
||||
req.path.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (;;) {
|
||||
// Prefer socket-level readiness to avoid SSL_pending() false-positives
|
||||
// from SSL internals. If the underlying socket is readable, assume an
|
||||
// early response may be present.
|
||||
auto sock = strm.socket();
|
||||
if (sock != INVALID_SOCKET && detail::select_read(sock, 0, 0) > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fallback to stream-level check for non-socket streams or when the
|
||||
// socket isn't reporting readable. Avoid using `is_readable()` for
|
||||
// SSL, since `SSL_pending()` may report buffered records that do not
|
||||
// indicate a complete application-level response yet.
|
||||
if (!is_ssl() && strm.is_readable()) { return false; }
|
||||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(now - start)
|
||||
.count();
|
||||
if (elapsed >= CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND) {
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Body
|
||||
if (skip_body) { return true; }
|
||||
|
||||
return write_request_body(strm, req, error);
|
||||
}
|
||||
|
||||
bool ClientImpl::write_request_body(Stream &strm, Request &req,
|
||||
Error &error) {
|
||||
if (req.body.empty()) {
|
||||
return write_content_with_provider(strm, req, error);
|
||||
}
|
||||
@@ -7758,8 +7836,20 @@ void ClientImpl::output_error_log(const Error &err,
|
||||
bool ClientImpl::process_request(Stream &strm, Request &req,
|
||||
Response &res, bool close_connection,
|
||||
Error &error) {
|
||||
// Send request
|
||||
if (!write_request(strm, req, close_connection, error)) { return false; }
|
||||
// Auto-add Expect: 100-continue for large bodies
|
||||
if (CPPHTTPLIB_EXPECT_100_THRESHOLD > 0 && !req.has_header("Expect")) {
|
||||
auto body_size = req.body.empty() ? req.content_length_ : req.body.size();
|
||||
if (body_size >= CPPHTTPLIB_EXPECT_100_THRESHOLD) {
|
||||
req.set_header("Expect", "100-continue");
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Expect: 100-continue
|
||||
auto expect_100_continue = req.get_header_value("Expect") == "100-continue";
|
||||
|
||||
// Send request (skip body if using Expect: 100-continue)
|
||||
auto write_request_success =
|
||||
write_request(strm, req, close_connection, error, expect_100_continue);
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
if (is_ssl()) {
|
||||
@@ -7774,14 +7864,48 @@ bool ClientImpl::process_request(Stream &strm, Request &req,
|
||||
}
|
||||
#endif
|
||||
|
||||
// Handle Expect: 100-continue with timeout
|
||||
if (expect_100_continue && CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND > 0) {
|
||||
time_t sec = CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND / 1000;
|
||||
time_t usec = (CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND % 1000) * 1000;
|
||||
auto ret = detail::select_read(strm.socket(), sec, usec);
|
||||
if (ret <= 0) {
|
||||
// Timeout or error: send body anyway (server didn't respond in time)
|
||||
if (!write_request_body(strm, req, error)) { return false; }
|
||||
expect_100_continue = false; // Switch to normal response handling
|
||||
}
|
||||
}
|
||||
|
||||
// Receive response and headers
|
||||
if (!read_response_line(strm, req, res) ||
|
||||
// When using Expect: 100-continue, don't auto-skip `100 Continue` response
|
||||
if (!read_response_line(strm, req, res, !expect_100_continue) ||
|
||||
!detail::read_headers(strm, res.headers)) {
|
||||
error = Error::Read;
|
||||
if (write_request_success) { error = Error::Read; }
|
||||
output_error_log(error, &req);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!write_request_success) { return false; }
|
||||
|
||||
// Handle Expect: 100-continue response
|
||||
if (expect_100_continue) {
|
||||
if (res.status == StatusCode::Continue_100) {
|
||||
// Server accepted, send the body
|
||||
if (!write_request_body(strm, req, error)) { return false; }
|
||||
|
||||
// Read the actual response
|
||||
res.headers.clear();
|
||||
res.body.clear();
|
||||
if (!read_response_line(strm, req, res) ||
|
||||
!detail::read_headers(strm, res.headers)) {
|
||||
error = Error::Read;
|
||||
output_error_log(error, &req);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// If not 100 Continue, server returned an error; proceed with that response
|
||||
}
|
||||
|
||||
// Body
|
||||
if ((res.status != StatusCode::NoContent_204) && req.method != "HEAD" &&
|
||||
req.method != "CONNECT") {
|
||||
@@ -9543,7 +9667,7 @@ bool SSLClient::load_certs() {
|
||||
last_openssl_error_ = ERR_get_error();
|
||||
ret = false;
|
||||
}
|
||||
} else {
|
||||
} else if (!ca_cert_store_) {
|
||||
auto loaded = false;
|
||||
#ifdef _WIN32
|
||||
loaded =
|
||||
@@ -9790,7 +9914,11 @@ bool SSLClient::verify_host_with_common_name(X509 *server_cert) const {
|
||||
|
||||
bool SSLClient::check_host_name(const char *pattern,
|
||||
size_t pattern_len) const {
|
||||
if (host_.size() == pattern_len && host_ == pattern) { return true; }
|
||||
// Exact match (case-insensitive)
|
||||
if (host_.size() == pattern_len &&
|
||||
detail::case_ignore::equal(host_, std::string(pattern, pattern_len))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Wildcard match
|
||||
// https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484
|
||||
@@ -9805,9 +9933,23 @@ bool SSLClient::check_host_name(const char *pattern,
|
||||
auto itr = pattern_components.begin();
|
||||
for (const auto &h : host_components_) {
|
||||
auto &p = *itr;
|
||||
if (p != h && p != "*") {
|
||||
auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' &&
|
||||
!p.compare(0, p.size() - 1, h));
|
||||
if (!httplib::detail::case_ignore::equal(p, h) && p != "*") {
|
||||
bool partial_match = false;
|
||||
if (!p.empty() && p[p.size() - 1] == '*') {
|
||||
const auto prefix_length = p.size() - 1;
|
||||
if (prefix_length == 0) {
|
||||
partial_match = true;
|
||||
} else if (h.size() >= prefix_length) {
|
||||
partial_match =
|
||||
std::equal(p.begin(),
|
||||
p.begin() + static_cast<std::string::difference_type>(
|
||||
prefix_length),
|
||||
h.begin(), [](const char ca, const char cb) {
|
||||
return httplib::detail::case_ignore::to_lower(ca) ==
|
||||
httplib::detail::case_ignore::to_lower(cb);
|
||||
});
|
||||
}
|
||||
}
|
||||
if (!partial_match) { return false; }
|
||||
}
|
||||
++itr;
|
||||
|
||||
102
vendor/cpp-httplib/httplib.h
vendored
102
vendor/cpp-httplib/httplib.h
vendored
@@ -8,8 +8,8 @@
|
||||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||
#define CPPHTTPLIB_HTTPLIB_H
|
||||
|
||||
#define CPPHTTPLIB_VERSION "0.30.1"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x001E01"
|
||||
#define CPPHTTPLIB_VERSION "0.30.2"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x001E02"
|
||||
|
||||
/*
|
||||
* Platform compatibility check
|
||||
@@ -98,6 +98,22 @@
|
||||
#define CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND 0
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_EXPECT_100_THRESHOLD
|
||||
#define CPPHTTPLIB_EXPECT_100_THRESHOLD 1024
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND
|
||||
#define CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND 1000
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD
|
||||
#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD (1024 * 1024)
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND
|
||||
#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND 50
|
||||
#endif
|
||||
|
||||
#ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND
|
||||
#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0
|
||||
#endif
|
||||
@@ -286,8 +302,10 @@ using socket_t = int;
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <condition_variable>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <exception>
|
||||
@@ -305,6 +323,7 @@ using socket_t = int;
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <system_error>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
@@ -494,6 +513,69 @@ private:
|
||||
bool execute_on_destruction;
|
||||
};
|
||||
|
||||
// Simple from_chars implementation for integer and double types (C++17
|
||||
// substitute)
|
||||
template <typename T> struct from_chars_result {
|
||||
const char *ptr;
|
||||
std::errc ec;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline from_chars_result<T> from_chars(const char *first, const char *last,
|
||||
T &value, int base = 10) {
|
||||
value = 0;
|
||||
const char *p = first;
|
||||
bool negative = false;
|
||||
|
||||
if (p != last && *p == '-') {
|
||||
negative = true;
|
||||
++p;
|
||||
}
|
||||
if (p == last) { return {first, std::errc::invalid_argument}; }
|
||||
|
||||
T result = 0;
|
||||
for (; p != last; ++p) {
|
||||
char c = *p;
|
||||
int digit = -1;
|
||||
if ('0' <= c && c <= '9') {
|
||||
digit = c - '0';
|
||||
} else if ('a' <= c && c <= 'z') {
|
||||
digit = c - 'a' + 10;
|
||||
} else if ('A' <= c && c <= 'Z') {
|
||||
digit = c - 'A' + 10;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (digit < 0 || digit >= base) { break; }
|
||||
if (result > ((std::numeric_limits<T>::max)() - digit) / base) {
|
||||
return {p, std::errc::result_out_of_range};
|
||||
}
|
||||
result = result * base + digit;
|
||||
}
|
||||
|
||||
if (p == first || (negative && p == first + 1)) {
|
||||
return {first, std::errc::invalid_argument};
|
||||
}
|
||||
|
||||
value = negative ? -result : result;
|
||||
return {p, std::errc{}};
|
||||
}
|
||||
|
||||
// from_chars for double (simple wrapper for strtod)
|
||||
inline from_chars_result<double> from_chars(const char *first, const char *last,
|
||||
double &value) {
|
||||
std::string s(first, last);
|
||||
char *endptr = nullptr;
|
||||
errno = 0;
|
||||
value = std::strtod(s.c_str(), &endptr);
|
||||
if (endptr == s.c_str()) { return {first, std::errc::invalid_argument}; }
|
||||
if (errno == ERANGE) {
|
||||
return {first + (endptr - s.c_str()), std::errc::result_out_of_range};
|
||||
}
|
||||
return {first + (endptr - s.c_str()), std::errc{}};
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
enum SSLVerifierResponse {
|
||||
@@ -1848,10 +1930,11 @@ private:
|
||||
Result send_(Request &&req);
|
||||
|
||||
socket_t create_client_socket(Error &error) const;
|
||||
bool read_response_line(Stream &strm, const Request &req,
|
||||
Response &res) const;
|
||||
bool read_response_line(Stream &strm, const Request &req, Response &res,
|
||||
bool skip_100_continue = true) const;
|
||||
bool write_request(Stream &strm, Request &req, bool close_connection,
|
||||
Error &error);
|
||||
Error &error, bool skip_body = false);
|
||||
bool write_request_body(Stream &strm, Request &req, Error &error);
|
||||
void prepare_default_headers(Request &r, bool for_stream,
|
||||
const std::string &ct);
|
||||
bool redirect(Request &req, Response &res, Error &error);
|
||||
@@ -3243,10 +3326,11 @@ private:
|
||||
msg.id = value;
|
||||
} else if (field == "retry") {
|
||||
// Parse retry interval in milliseconds
|
||||
try {
|
||||
retry_ms = std::stoi(value);
|
||||
} catch (...) {
|
||||
// Invalid retry value, ignore
|
||||
{
|
||||
int v = 0;
|
||||
auto res =
|
||||
detail::from_chars(value.data(), value.data() + value.size(), v);
|
||||
if (res.ec == std::errc{}) { retry_ms = v; }
|
||||
}
|
||||
}
|
||||
// Unknown fields are ignored per SSE spec
|
||||
|
||||
Reference in New Issue
Block a user