mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-12 14:03:20 +02:00
Compare commits
44 Commits
b2456
...
gg/hf-args
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c3d5b5a79 | ||
|
|
1b2f0a9ee8 | ||
|
|
29ab270e65 | ||
|
|
6b8bb3a31d | ||
|
|
68e210b354 | ||
|
|
b3e94f26ba | ||
|
|
b2075fd6a5 | ||
|
|
95d576b48e | ||
|
|
59c17f02de | ||
|
|
fa046eafbc | ||
|
|
be07a03217 | ||
|
|
d0a71233fb | ||
|
|
f372c49ccd | ||
|
|
924ce1dce7 | ||
|
|
03a8f8fafe | ||
|
|
cfd3be76e3 | ||
|
|
5b7b0ac8df | ||
|
|
1943c01981 | ||
|
|
5e43ba8742 | ||
|
|
76aa30a263 | ||
|
|
c5b8595e3f | ||
|
|
42e21c6882 | ||
|
|
1c51f98adc | ||
|
|
f9c7ba3447 | ||
|
|
272935b281 | ||
|
|
ccf58aa3ec | ||
|
|
91f8ad167d | ||
|
|
6b7e76d28c | ||
|
|
bc0baab2ea | ||
|
|
d795988d9e | ||
|
|
f8c4e745e1 | ||
|
|
47cc7a7bf9 | ||
|
|
bd60d82d0c | ||
|
|
6c0b287748 | ||
|
|
d26e8b669d | ||
|
|
d8b009a945 | ||
|
|
d0d5de42e5 | ||
|
|
b80cf3b2d1 | ||
|
|
970a48060a | ||
|
|
4c28b82529 | ||
|
|
2d15886bb0 | ||
|
|
d199ca79f2 | ||
|
|
104f5e0fc1 | ||
|
|
5e1b7f94a0 |
114
.github/workflows/build.yml
vendored
114
.github/workflows/build.yml
vendored
@@ -21,6 +21,118 @@ env:
|
||||
GGML_N_THREADS: 1
|
||||
|
||||
jobs:
|
||||
macOS-latest-cmake-arm64:
|
||||
runs-on: macos-14
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
continue-on-error: true
|
||||
run: |
|
||||
brew update
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
sysctl -a
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
|
||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||
|
||||
- name: Test
|
||||
id: cmake_test
|
||||
run: |
|
||||
cd build
|
||||
ctest -L main --verbose --timeout 900
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
||||
|
||||
- name: Upload artifacts
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
path: |
|
||||
llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
||||
|
||||
macOS-latest-cmake-x64:
|
||||
runs-on: macos-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
continue-on-error: true
|
||||
run: |
|
||||
brew update
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
sysctl -a
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
|
||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||
|
||||
- name: Test
|
||||
id: cmake_test
|
||||
run: |
|
||||
cd build
|
||||
ctest -L main --verbose --timeout 900
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
||||
|
||||
- name: Upload artifacts
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
path: |
|
||||
llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
||||
|
||||
ubuntu-focal-make:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
@@ -748,6 +860,8 @@ jobs:
|
||||
- macOS-latest-cmake
|
||||
- windows-latest-cmake
|
||||
- windows-latest-cmake-cublas
|
||||
- macOS-latest-cmake-arm64
|
||||
- macOS-latest-cmake-x64
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
|
||||
1
.github/workflows/close-issue.yml
vendored
1
.github/workflows/close-issue.yml
vendored
@@ -12,6 +12,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 14
|
||||
stale-issue-label: "stale"
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -11,7 +11,10 @@
|
||||
*.gcda
|
||||
*.dot
|
||||
*.bat
|
||||
*.tmp
|
||||
*.metallib
|
||||
*.etag
|
||||
*.lastModified
|
||||
.DS_Store
|
||||
.build/
|
||||
.cache/
|
||||
|
||||
16
Makefile
16
Makefile
@@ -9,7 +9,8 @@ TEST_TARGETS = \
|
||||
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
|
||||
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
|
||||
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
|
||||
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease
|
||||
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease \
|
||||
tests/test-json-schema-to-grammar
|
||||
|
||||
# Code coverage output files
|
||||
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
||||
@@ -666,6 +667,9 @@ console.o: common/console.cpp common/console.h
|
||||
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
train.o: common/train.cpp common/train.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
@@ -745,7 +749,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp json-schema-to-grammar.o common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
||||
|
||||
@@ -753,6 +757,10 @@ gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
@@ -861,6 +869,10 @@ tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
132
README-sycl.md
132
README-sycl.md
@@ -29,6 +29,8 @@ For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building).
|
||||
## News
|
||||
|
||||
- 2024.3
|
||||
- A blog is published: **Run LLM on all Intel GPUs Using llama.cpp**: [intel.com](https://www.intel.com/content/www/us/en/developer/articles/technical/run-llm-on-all-gpus-using-llama-cpp-artical.html) or [medium.com](https://medium.com/@jianyu_neo/run-llm-on-all-intel-gpus-using-llama-cpp-fd2e2dcbd9bd).
|
||||
- New base line is ready: [tag b2437](https://github.com/ggerganov/llama.cpp/tree/b2437).
|
||||
- Support multiple cards: **--split-mode**: [none|layer]; not support [row], it's on developing.
|
||||
- Support to assign main GPU by **--main-gpu**, replace $GGML_SYCL_DEVICE.
|
||||
- Support detecting all GPUs with level-zero and same top **Max compute units**.
|
||||
@@ -81,7 +83,7 @@ For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, rec
|
||||
|-|-|-|
|
||||
|Ampere Series| Support| A100|
|
||||
|
||||
### oneMKL
|
||||
### oneMKL for CUDA
|
||||
|
||||
The current oneMKL release does not contain the oneMKL cuBlas backend.
|
||||
As a result for Nvidia GPU's oneMKL must be built from source.
|
||||
@@ -114,7 +116,7 @@ You can choose between **F16** and **F32** build. F16 is faster for long-prompt
|
||||
# Or, for F32:
|
||||
docker build -t llama-cpp-sycl -f .devops/main-intel.Dockerfile .
|
||||
|
||||
# Note: you can also use the ".devops/main-server.Dockerfile", which compiles the "server" example
|
||||
# Note: you can also use the ".devops/server-intel.Dockerfile", which compiles the "server" example
|
||||
```
|
||||
|
||||
### Run
|
||||
@@ -254,16 +256,16 @@ Run without parameter:
|
||||
Check the ID in startup log, like:
|
||||
|
||||
```
|
||||
found 4 SYCL devices:
|
||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
|
||||
found 6 SYCL devices:
|
||||
| | | |Compute |Max compute|Max work|Max sub| |
|
||||
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
||||
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
||||
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
||||
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
||||
| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136|
|
||||
| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216|
|
||||
| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616|
|
||||
| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616|
|
||||
```
|
||||
|
||||
|Attribute|Note|
|
||||
@@ -271,12 +273,35 @@ found 4 SYCL devices:
|
||||
|compute capability 1.3|Level-zero running time, recommended |
|
||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||
|
||||
4. Set device ID and execute llama.cpp
|
||||
4. Device selection and execution of llama.cpp
|
||||
|
||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||
There are two device selection modes:
|
||||
|
||||
- Single device: Use one device assigned by user.
|
||||
- Multiple devices: Automatically choose the devices with the same biggest Max compute units.
|
||||
|
||||
|Device selection|Parameter|
|
||||
|-|-|
|
||||
|Single device|--split-mode none --main-gpu DEVICE_ID |
|
||||
|Multiple devices|--split-mode layer (default)|
|
||||
|
||||
Examples:
|
||||
|
||||
- Use device 0:
|
||||
|
||||
```sh
|
||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0
|
||||
```
|
||||
or run by script:
|
||||
|
||||
```sh
|
||||
./examples/sycl/run_llama2.sh 0
|
||||
```
|
||||
|
||||
- Use multiple devices:
|
||||
|
||||
```sh
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer
|
||||
```
|
||||
or run by script:
|
||||
|
||||
@@ -289,12 +314,18 @@ Note:
|
||||
- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
|
||||
|
||||
|
||||
5. Check the device ID in output
|
||||
5. Verify the device ID in output
|
||||
|
||||
Verify to see if the selected GPU is shown in the output, like:
|
||||
|
||||
Like:
|
||||
```
|
||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
detect 1 SYCL GPUs: [0] with top Max compute units:512
|
||||
```
|
||||
Or
|
||||
```
|
||||
use 1 SYCL GPUs: [0] with Max compute units:512
|
||||
```
|
||||
|
||||
|
||||
## Windows
|
||||
|
||||
@@ -355,7 +386,7 @@ a. Download & install cmake for Windows: https://cmake.org/download/
|
||||
|
||||
b. Download & install mingw-w64 make for Windows provided by w64devkit
|
||||
|
||||
- Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
|
||||
- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
|
||||
|
||||
- Extract `w64devkit` on your pc.
|
||||
|
||||
@@ -430,15 +461,16 @@ build\bin\main.exe
|
||||
Check the ID in startup log, like:
|
||||
|
||||
```
|
||||
found 4 SYCL devices:
|
||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
found 6 SYCL devices:
|
||||
| | | |Compute |Max compute|Max work|Max sub| |
|
||||
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
||||
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
||||
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
||||
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
||||
| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136|
|
||||
| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216|
|
||||
| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616|
|
||||
| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616|
|
||||
|
||||
```
|
||||
|
||||
@@ -447,13 +479,31 @@ found 4 SYCL devices:
|
||||
|compute capability 1.3|Level-zero running time, recommended |
|
||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||
|
||||
4. Set device ID and execute llama.cpp
|
||||
|
||||
Set device ID = 0 by **set GGML_SYCL_DEVICE=0**
|
||||
4. Device selection and execution of llama.cpp
|
||||
|
||||
There are two device selection modes:
|
||||
|
||||
- Single device: Use one device assigned by user.
|
||||
- Multiple devices: Automatically choose the devices with the same biggest Max compute units.
|
||||
|
||||
|Device selection|Parameter|
|
||||
|-|-|
|
||||
|Single device|--split-mode none --main-gpu DEVICE_ID |
|
||||
|Multiple devices|--split-mode layer (default)|
|
||||
|
||||
Examples:
|
||||
|
||||
- Use device 0:
|
||||
|
||||
```
|
||||
set GGML_SYCL_DEVICE=0
|
||||
build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0
|
||||
build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0
|
||||
```
|
||||
|
||||
- Use multiple devices:
|
||||
|
||||
```
|
||||
build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer
|
||||
```
|
||||
or run by script:
|
||||
|
||||
@@ -466,11 +516,17 @@ Note:
|
||||
- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
|
||||
|
||||
|
||||
5. Check the device ID in output
|
||||
|
||||
Like:
|
||||
5. Verify the device ID in output
|
||||
|
||||
Verify to see if the selected GPU is shown in the output, like:
|
||||
|
||||
```
|
||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
detect 1 SYCL GPUs: [0] with top Max compute units:512
|
||||
```
|
||||
Or
|
||||
```
|
||||
use 1 SYCL GPUs: [0] with Max compute units:512
|
||||
```
|
||||
|
||||
## Environment Variable
|
||||
@@ -489,7 +545,6 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||
|ZES_ENABLE_SYSMAN| 0 (default) or 1|Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer|
|
||||
|
||||
@@ -507,6 +562,9 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
|
||||
## Q&A
|
||||
|
||||
Note: please add prefix **[SYCL]** in issue title, so that we will check it as soon as possible.
|
||||
|
||||
|
||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||
|
||||
Miss to enable oneAPI running environment.
|
||||
@@ -538,4 +596,4 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
|
||||
## Todo
|
||||
|
||||
- Support multiple cards.
|
||||
- Support row layer split for multiple card runs.
|
||||
|
||||
@@ -17,6 +17,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
|
||||
|
||||
### Hot topics
|
||||
|
||||
- Fix major bug in Metal batched inference https://github.com/ggerganov/llama.cpp/pull/6225
|
||||
- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017
|
||||
- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981
|
||||
- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962
|
||||
@@ -165,6 +166,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
|
||||
- [cztomsik/ava](https://github.com/cztomsik/ava) (MIT)
|
||||
- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal)
|
||||
- [pythops/tenere](https://github.com/pythops/tenere) (AGPL)
|
||||
- [RecurseChat](https://recurse.chat/) (proprietary)
|
||||
- [semperai/amica](https://github.com/semperai/amica)
|
||||
- [withcatai/catai](https://github.com/withcatai/catai)
|
||||
- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT)
|
||||
|
||||
@@ -122,6 +122,7 @@ pub fn build(b: *std.build.Builder) !void {
|
||||
const console = make.obj("console", "common/console.cpp");
|
||||
const sampling = make.obj("sampling", "common/sampling.cpp");
|
||||
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
|
||||
const json_schema_to_grammar = make.obj("json-schema-to-grammar", "common/json-schema-to-grammar.cpp");
|
||||
const train = make.obj("train", "common/train.cpp");
|
||||
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
||||
const llava = make.obj("llava", "examples/llava/llava.cpp");
|
||||
@@ -133,7 +134,7 @@ pub fn build(b: *std.build.Builder) !void {
|
||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
|
||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
|
||||
|
||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, grammar_parser, clip, llava });
|
||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, grammar_parser, json_schema_to_grammar, clip, llava });
|
||||
if (server.target.isWindows()) {
|
||||
server.linkSystemLibrary("ws2_32");
|
||||
}
|
||||
|
||||
@@ -47,6 +47,8 @@ if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
set(TARGET json-schema-to-grammar)
|
||||
add_library(${TARGET} OBJECT json-schema-to-grammar.cpp json-schema-to-grammar.h)
|
||||
|
||||
set(TARGET common)
|
||||
|
||||
@@ -60,6 +62,7 @@ add_library(${TARGET} STATIC
|
||||
console.cpp
|
||||
grammar-parser.h
|
||||
grammar-parser.cpp
|
||||
json.hpp
|
||||
train.h
|
||||
train.cpp
|
||||
)
|
||||
|
||||
@@ -101,7 +101,7 @@ int32_t get_num_physical_cores() {
|
||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
}
|
||||
|
||||
void process_escapes(std::string& input) {
|
||||
void process_escapes(std::string & input) {
|
||||
std::size_t input_len = input.length();
|
||||
std::size_t output_idx = 0;
|
||||
|
||||
@@ -154,8 +154,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool gpt_params_find_arg(int argc, char ** argv, gpt_params & params, int & i, bool & invalid_param) {
|
||||
std::string arg = argv[i];
|
||||
static bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) {
|
||||
llama_sampling_params& sparams = params.sparams;
|
||||
|
||||
if (arg == "-s" || arg == "--seed") {
|
||||
@@ -648,14 +647,6 @@ static bool gpt_params_find_arg(int argc, char ** argv, gpt_params & params, int
|
||||
params.model = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-mu" || arg == "--model-url") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.model_url = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-md" || arg == "--model-draft") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -672,6 +663,30 @@ static bool gpt_params_find_arg(int argc, char ** argv, gpt_params & params, int
|
||||
params.model_alias = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-mu" || arg == "--model-url") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.model_url = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-hfr" || arg == "--hf-repo") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.hf_repo = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-hff" || arg == "--hf-file") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.hf_file = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--lora") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -1056,7 +1071,8 @@ static bool gpt_params_find_arg(int argc, char ** argv, gpt_params & params, int
|
||||
return true;
|
||||
}
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
return false;
|
||||
gpt_print_usage(argc, argv, gpt_params());
|
||||
exit(0);
|
||||
}
|
||||
if (arg == "--version") {
|
||||
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
|
||||
@@ -1200,7 +1216,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
if (!gpt_params_find_arg(argc, argv, params, i, invalid_param)) {
|
||||
if (!gpt_params_find_arg(argc, argv, arg, params, i, invalid_param)) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
}
|
||||
}
|
||||
@@ -1403,10 +1419,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
|
||||
printf(" -m FNAME, --model FNAME\n");
|
||||
printf(" model path (default: %s)\n", params.model.c_str());
|
||||
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
|
||||
printf(" model download url (default: %s)\n", params.model_url.c_str());
|
||||
printf(" -md FNAME, --model-draft FNAME\n");
|
||||
printf(" draft model for speculative decoding\n");
|
||||
printf(" draft model for speculative decoding (default: unused)\n");
|
||||
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
|
||||
printf(" model download url (default: unused)\n");
|
||||
printf(" -hfr REPO, --hf-repo REPO\n");
|
||||
printf(" Hugging Face model repository (default: unused)\n");
|
||||
printf(" -hff FILE, --hf-file FILE\n");
|
||||
printf(" Hugging Face model file (default: unused)\n");
|
||||
printf(" -ld LOGDIR, --logdir LOGDIR\n");
|
||||
printf(" path under which to save YAML logs (no logging if unset)\n");
|
||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||
@@ -1589,6 +1609,9 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
|
||||
if (s == "q4_1") {
|
||||
return GGML_TYPE_Q4_1;
|
||||
}
|
||||
if (s == "iq4_nl") {
|
||||
return GGML_TYPE_IQ4_NL;
|
||||
}
|
||||
if (s == "q5_0") {
|
||||
return GGML_TYPE_Q5_0;
|
||||
}
|
||||
@@ -1652,8 +1675,10 @@ void llama_batch_add(
|
||||
|
||||
#ifdef LLAMA_USE_CURL
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
|
||||
struct llama_model_params params) {
|
||||
struct llama_model * llama_load_model_from_url(
|
||||
const char * model_url,
|
||||
const char * path_model,
|
||||
const struct llama_model_params & params) {
|
||||
// Basic validation of the model_url
|
||||
if (!model_url || strlen(model_url) == 0) {
|
||||
fprintf(stderr, "%s: invalid model_url\n", __func__);
|
||||
@@ -1847,25 +1872,62 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
||||
return llama_load_model_from_file(path_model, params);
|
||||
}
|
||||
|
||||
struct llama_model * llama_load_model_from_hf(
|
||||
const char * repo,
|
||||
const char * model,
|
||||
const char * path_model,
|
||||
const struct llama_model_params & params) {
|
||||
// construct hugging face model url:
|
||||
//
|
||||
// --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf
|
||||
// https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf
|
||||
//
|
||||
// --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf
|
||||
// https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf
|
||||
//
|
||||
|
||||
std::string model_url = "https://huggingface.co/";
|
||||
model_url += repo;
|
||||
model_url += "/resolve/main/";
|
||||
model_url += model;
|
||||
|
||||
return llama_load_model_from_url(model_url.c_str(), path_model, params);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/,
|
||||
struct llama_model_params /*params*/) {
|
||||
struct llama_model * llama_load_model_from_url(
|
||||
const char * /*model_url*/,
|
||||
const char * /*path_model*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct llama_model * llama_load_model_from_hf(
|
||||
const char * /*repo*/,
|
||||
const char * /*model*/,
|
||||
const char * /*path_model*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif // LLAMA_USE_CURL
|
||||
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
||||
auto mparams = llama_model_params_from_gpt_params(params);
|
||||
|
||||
llama_model * model = nullptr;
|
||||
if (!params.model_url.empty()) {
|
||||
|
||||
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
|
||||
model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), mparams);
|
||||
} else if (!params.model_url.empty()) {
|
||||
model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams);
|
||||
} else {
|
||||
model = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||
}
|
||||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
@@ -1905,7 +1967,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
||||
const std::string& lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
||||
int err = llama_model_apply_lora_from_file(model,
|
||||
lora_adapter.c_str(),
|
||||
|
||||
@@ -89,9 +89,11 @@ struct gpt_params {
|
||||
struct llama_sampling_params sparams;
|
||||
|
||||
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_alias = "unknown"; // model alias
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string hf_repo = ""; // HF repo
|
||||
std::string hf_file = ""; // HF file
|
||||
std::string prompt = "";
|
||||
std::string prompt_file = ""; // store the external prompt file name
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||
@@ -139,7 +141,7 @@ struct gpt_params {
|
||||
bool interactive_first = false; // wait for user input immediately
|
||||
bool multiline_input = false; // reverse the usage of `\`
|
||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||
bool cont_batching = false; // insert new sequences for decoding on-the-fly
|
||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool ignore_eos = false; // ignore generated EOS tokens
|
||||
@@ -192,8 +194,8 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
|
||||
struct llama_model_params params);
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
|
||||
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
|
||||
|
||||
// Batch utils
|
||||
|
||||
|
||||
725
common/json-schema-to-grammar.cpp
Normal file
725
common/json-schema-to-grammar.cpp
Normal file
@@ -0,0 +1,725 @@
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
const std::string SPACE_RULE = "\" \"?";
|
||||
|
||||
std::unordered_map<std::string, std::string> PRIMITIVE_RULES = {
|
||||
{"boolean", "(\"true\" | \"false\") space"},
|
||||
{"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"},
|
||||
{"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"},
|
||||
{"value", "object | array | string | number | boolean"},
|
||||
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
|
||||
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
|
||||
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
|
||||
{"string", " \"\\\"\" (\n"
|
||||
" [^\"\\\\] |\n"
|
||||
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
||||
" )* \"\\\"\" space"},
|
||||
{"null", "\"null\" space"}
|
||||
};
|
||||
std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"};
|
||||
|
||||
std::unordered_map<std::string, std::string> DATE_RULES = {
|
||||
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"},
|
||||
{"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"},
|
||||
{"date-time", "date \"T\" time"},
|
||||
{"date-string", "\"\\\"\" date \"\\\"\" space"},
|
||||
{"time-string", "\"\\\"\" time \"\\\"\" space"},
|
||||
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"}
|
||||
};
|
||||
|
||||
static bool is_reserved_name(const std::string & name) {
|
||||
static std::unordered_set<std::string> RESERVED_NAMES;
|
||||
if (RESERVED_NAMES.empty()) {
|
||||
RESERVED_NAMES.insert("root");
|
||||
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
||||
for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first);
|
||||
}
|
||||
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
||||
}
|
||||
|
||||
std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
|
||||
std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]");
|
||||
std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
|
||||
std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||
{'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}
|
||||
};
|
||||
|
||||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||
|
||||
template <typename Iterator>
|
||||
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
||||
std::ostringstream result;
|
||||
if (begin != end) {
|
||||
result << *begin;
|
||||
for (Iterator it = begin + 1; it != end; ++it) {
|
||||
result << separator << *it;
|
||||
}
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(delimiter);
|
||||
|
||||
while (end != std::string::npos) {
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + delimiter.length();
|
||||
end = str.find(delimiter, start);
|
||||
}
|
||||
|
||||
tokens.push_back(str.substr(start));
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
static std::string repeat(const std::string & str, size_t n) {
|
||||
if (n == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(str.length() * n);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
result += str;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
||||
std::smatch match;
|
||||
std::string result;
|
||||
|
||||
std::string::const_iterator searchStart(input.cbegin());
|
||||
std::string::const_iterator searchEnd(input.cend());
|
||||
|
||||
while (std::regex_search(searchStart, searchEnd, match, regex)) {
|
||||
result.append(searchStart, searchStart + match.position());
|
||||
result.append(replacement(match));
|
||||
searchStart = match.suffix().first;
|
||||
}
|
||||
|
||||
result.append(searchStart, searchEnd);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string format_literal(const std::string & literal) {
|
||||
std::string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
|
||||
char c = match.str()[0];
|
||||
return GRAMMAR_LITERAL_ESCAPES.at(c);
|
||||
});
|
||||
return "\"" + escaped + "\"";
|
||||
}
|
||||
|
||||
|
||||
class SchemaConverter {
|
||||
private:
|
||||
std::function<json(const std::string &)> _fetch_json;
|
||||
bool _dotall;
|
||||
std::map<std::string, std::string> _rules;
|
||||
std::unordered_map<std::string, nlohmann::json> _refs;
|
||||
std::unordered_set<std::string> _refs_being_resolved;
|
||||
std::vector<std::string> _errors;
|
||||
std::vector<std::string> _warnings;
|
||||
|
||||
std::string _add_rule(const std::string & name, const std::string & rule) {
|
||||
std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-");
|
||||
if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
|
||||
_rules[esc_name] = rule;
|
||||
return esc_name;
|
||||
} else {
|
||||
int i = 0;
|
||||
while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
|
||||
i++;
|
||||
}
|
||||
std::string key = esc_name + std::to_string(i);
|
||||
_rules[key] = rule;
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
||||
std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
|
||||
std::vector<std::string> rules;
|
||||
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
||||
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
||||
}
|
||||
return join(rules.begin(), rules.end(), " | ");
|
||||
}
|
||||
|
||||
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
||||
if (!(pattern.front() == '^' && pattern.back() == '$')) {
|
||||
_errors.push_back("Pattern must start with '^' and end with '$'");
|
||||
return "";
|
||||
}
|
||||
std::string sub_pattern = pattern.substr(1, pattern.length() - 2);
|
||||
std::unordered_map<std::string, std::string> sub_rule_ids;
|
||||
|
||||
size_t i = 0;
|
||||
size_t length = sub_pattern.length();
|
||||
|
||||
using literal_or_rule = std::pair<std::string, bool>;
|
||||
auto to_rule = [&](const literal_or_rule & ls) {
|
||||
auto is_literal = ls.second;
|
||||
auto s = ls.first;
|
||||
return is_literal ? "\"" + s + "\"" : s;
|
||||
};
|
||||
std::function<literal_or_rule()> transform = [&]() -> literal_or_rule {
|
||||
size_t start = i;
|
||||
std::vector<literal_or_rule> seq;
|
||||
|
||||
auto get_dot = [&]() {
|
||||
std::string rule;
|
||||
if (_dotall) {
|
||||
rule = "[\\U00000000-\\U0010FFFF]";
|
||||
} else {
|
||||
rule = "[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]";
|
||||
}
|
||||
return _add_rule("dot", rule);
|
||||
};
|
||||
|
||||
// Joins the sequence, merging consecutive literals together.
|
||||
auto join_seq = [&]() {
|
||||
std::vector<literal_or_rule> ret;
|
||||
|
||||
std::string literal;
|
||||
auto flush_literal = [&]() {
|
||||
if (literal.empty()) {
|
||||
return false;
|
||||
}
|
||||
ret.push_back(std::make_pair(literal, true));
|
||||
literal.clear();
|
||||
return true;
|
||||
};
|
||||
|
||||
for (const auto & item : seq) {
|
||||
auto is_literal = item.second;
|
||||
if (is_literal) {
|
||||
literal += item.first;
|
||||
} else {
|
||||
flush_literal();
|
||||
ret.push_back(item);
|
||||
}
|
||||
}
|
||||
flush_literal();
|
||||
|
||||
std::vector<std::string> results;
|
||||
for (const auto & item : ret) {
|
||||
results.push_back(to_rule(item));
|
||||
}
|
||||
return std::make_pair(join(results.begin(), results.end(), " "), false);
|
||||
};
|
||||
|
||||
while (i < length) {
|
||||
char c = sub_pattern[i];
|
||||
if (c == '.') {
|
||||
seq.push_back(std::make_pair(get_dot(), false));
|
||||
i++;
|
||||
} else if (c == '(') {
|
||||
i++;
|
||||
if (i < length) {
|
||||
if (sub_pattern[i] == '?') {
|
||||
_warnings.push_back("Unsupported pattern syntax");
|
||||
}
|
||||
}
|
||||
seq.push_back(std::make_pair("(" + to_rule(transform()) + ")", false));
|
||||
} else if (c == ')') {
|
||||
i++;
|
||||
if (start > 0 && sub_pattern[start - 1] != '(') {
|
||||
_errors.push_back("Unbalanced parentheses");
|
||||
}
|
||||
return join_seq();
|
||||
} else if (c == '[') {
|
||||
std::string square_brackets = std::string(1, c);
|
||||
i++;
|
||||
while (i < length && sub_pattern[i] != ']') {
|
||||
if (sub_pattern[i] == '\\') {
|
||||
square_brackets += sub_pattern.substr(i, 2);
|
||||
i += 2;
|
||||
} else {
|
||||
square_brackets += sub_pattern[i];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (i >= length) {
|
||||
_errors.push_back("Unbalanced square brackets");
|
||||
}
|
||||
square_brackets += ']';
|
||||
i++;
|
||||
seq.push_back(std::make_pair(square_brackets, false));
|
||||
} else if (c == '|') {
|
||||
seq.push_back(std::make_pair("|", false));
|
||||
i++;
|
||||
} else if (c == '*' || c == '+' || c == '?') {
|
||||
seq.back() = std::make_pair(to_rule(seq.back()) + c, false);
|
||||
i++;
|
||||
} else if (c == '{') {
|
||||
std::string curly_brackets = std::string(1, c);
|
||||
i++;
|
||||
while (i < length && sub_pattern[i] != '}') {
|
||||
curly_brackets += sub_pattern[i];
|
||||
i++;
|
||||
}
|
||||
if (i >= length) {
|
||||
_errors.push_back("Unbalanced curly brackets");
|
||||
}
|
||||
curly_brackets += '}';
|
||||
i++;
|
||||
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||
int min_times = 0;
|
||||
int max_times = std::numeric_limits<int>::max();
|
||||
try {
|
||||
if (nums.size() == 1) {
|
||||
min_times = max_times = std::stoi(nums[0]);
|
||||
} else if (nums.size() != 2) {
|
||||
_errors.push_back("Wrong number of values in curly brackets");
|
||||
} else {
|
||||
if (!nums[0].empty()) {
|
||||
min_times = std::stoi(nums[0]);
|
||||
}
|
||||
if (!nums[1].empty()) {
|
||||
max_times = std::stoi(nums[1]);
|
||||
}
|
||||
}
|
||||
} catch (const std::invalid_argument & e) {
|
||||
_errors.push_back("Invalid number in curly brackets");
|
||||
return std::make_pair("", false);
|
||||
}
|
||||
auto &last = seq.back();
|
||||
auto &sub = last.first;
|
||||
auto sub_is_literal = last.second;
|
||||
|
||||
if (min_times == 0 && max_times == std::numeric_limits<int>::max()) {
|
||||
sub += "*";
|
||||
} else if (min_times == 0 && max_times == 1) {
|
||||
sub += "?";
|
||||
} else if (min_times == 1 && max_times == std::numeric_limits<int>::max()) {
|
||||
sub += "+";
|
||||
} else {
|
||||
if (!sub_is_literal) {
|
||||
std::string & sub_id = sub_rule_ids[sub];
|
||||
if (sub_id.empty()) {
|
||||
sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub);
|
||||
}
|
||||
sub = sub_id;
|
||||
}
|
||||
std::string result;
|
||||
if (sub_is_literal && min_times > 0) {
|
||||
result = "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\"";
|
||||
} else {
|
||||
for (int j = 0; j < min_times; j++) {
|
||||
if (j > 0) {
|
||||
result += " ";
|
||||
}
|
||||
result += sub;
|
||||
}
|
||||
}
|
||||
if (min_times > 0 && min_times < max_times) {
|
||||
result += " ";
|
||||
}
|
||||
if (max_times == std::numeric_limits<int>::max()) {
|
||||
result += sub + "*";
|
||||
} else {
|
||||
for (int j = min_times; j < max_times; j++) {
|
||||
if (j > min_times) {
|
||||
result += " ";
|
||||
}
|
||||
result += sub + "?";
|
||||
}
|
||||
}
|
||||
seq.back().first = result;
|
||||
seq.back().second = false;
|
||||
}
|
||||
} else {
|
||||
std::string literal;
|
||||
auto is_non_literal = [&](char c) {
|
||||
return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end();
|
||||
};
|
||||
while (i < length) {
|
||||
if (sub_pattern[i] == '\\' && i < length - 1) {
|
||||
char next = sub_pattern[i + 1];
|
||||
if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) {
|
||||
i++;
|
||||
literal += sub_pattern[i];
|
||||
i++;
|
||||
} else {
|
||||
literal += sub_pattern.substr(i, 2);
|
||||
i += 2;
|
||||
}
|
||||
} else if (sub_pattern[i] == '"') {
|
||||
literal += "\\\"";
|
||||
i++;
|
||||
} else if (!is_non_literal(sub_pattern[i]) &&
|
||||
(i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) {
|
||||
literal += sub_pattern[i];
|
||||
i++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!literal.empty()) {
|
||||
seq.push_back(std::make_pair(literal, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
return join_seq();
|
||||
};
|
||||
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||
}
|
||||
|
||||
std::string _resolve_ref(const std::string & ref) {
|
||||
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
||||
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
||||
_refs_being_resolved.insert(ref);
|
||||
json resolved = _refs[ref];
|
||||
ref_name = visit(resolved, ref_name);
|
||||
_refs_being_resolved.erase(ref);
|
||||
}
|
||||
return ref_name;
|
||||
}
|
||||
|
||||
std::string _build_object_rule(
|
||||
const std::vector<std::pair<std::string, json>> & properties,
|
||||
const std::unordered_set<std::string> & required,
|
||||
const std::string & name,
|
||||
const json & additional_properties)
|
||||
{
|
||||
std::vector<std::string> required_props;
|
||||
std::vector<std::string> optional_props;
|
||||
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
||||
for (const auto & kv : properties) {
|
||||
const auto &prop_name = kv.first;
|
||||
const auto &prop_schema = kv.second;
|
||||
|
||||
std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name);
|
||||
prop_kv_rule_names[prop_name] = _add_rule(
|
||||
name + (name.empty() ? "" : "-") + prop_name + "-kv",
|
||||
format_literal(prop_name) + " space \":\" space " + prop_rule_name
|
||||
);
|
||||
if (required.find(prop_name) != required.end()) {
|
||||
required_props.push_back(prop_name);
|
||||
} else {
|
||||
optional_props.push_back(prop_name);
|
||||
}
|
||||
}
|
||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||
prop_kv_rule_names["*"] = kv_rule;
|
||||
optional_props.push_back("*");
|
||||
}
|
||||
|
||||
std::string rule = "\"{\" space ";
|
||||
for (size_t i = 0; i < required_props.size(); i++) {
|
||||
if (i > 0) {
|
||||
rule += " \",\" space ";
|
||||
}
|
||||
rule += prop_kv_rule_names[required_props[i]];
|
||||
}
|
||||
|
||||
if (!optional_props.empty()) {
|
||||
rule += " (";
|
||||
if (!required_props.empty()) {
|
||||
rule += " \",\" space ( ";
|
||||
}
|
||||
|
||||
std::function<std::string(const std::vector<std::string> &, bool)> get_recursive_refs = [&](const std::vector<std::string> & ks, bool first_is_optional) {
|
||||
std::string res;
|
||||
if (ks.empty()) {
|
||||
return res;
|
||||
}
|
||||
std::string k = ks[0];
|
||||
std::string kv_rule_name = prop_kv_rule_names[k];
|
||||
if (k == "*") {
|
||||
res = _add_rule(
|
||||
name + (name.empty() ? "" : "-") + "additional-kvs",
|
||||
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
|
||||
);
|
||||
} else if (first_is_optional) {
|
||||
res = "( \",\" space " + kv_rule_name + " )?";
|
||||
} else {
|
||||
res = kv_rule_name;
|
||||
}
|
||||
if (ks.size() > 1) {
|
||||
res += " " + _add_rule(
|
||||
name + (name.empty() ? "" : "-") + k + "-rest",
|
||||
get_recursive_refs(std::vector<std::string>(ks.begin() + 1, ks.end()), true)
|
||||
);
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < optional_props.size(); i++) {
|
||||
if (i > 0) {
|
||||
rule += " | ";
|
||||
}
|
||||
rule += get_recursive_refs(std::vector<std::string>(optional_props.begin() + i, optional_props.end()), false);
|
||||
}
|
||||
if (!required_props.empty()) {
|
||||
rule += " )";
|
||||
}
|
||||
rule += " )?";
|
||||
}
|
||||
|
||||
rule += " \"}\" space";
|
||||
|
||||
return rule;
|
||||
}
|
||||
|
||||
public:
|
||||
SchemaConverter(
|
||||
const std::function<json(const std::string &)> & fetch_json,
|
||||
bool dotall)
|
||||
: _fetch_json(fetch_json), _dotall(dotall)
|
||||
{
|
||||
_rules["space"] = SPACE_RULE;
|
||||
}
|
||||
|
||||
void resolve_refs(nlohmann::json & schema, const std::string & url) {
|
||||
/*
|
||||
* Resolves all $ref fields in the given schema, fetching any remote schemas,
|
||||
* replacing each $ref with absolute reference URL and populates _refs with the
|
||||
* respective referenced (sub)schema dictionaries.
|
||||
*/
|
||||
std::function<void(json &)> visit_refs = [&](json & n) {
|
||||
if (n.is_array()) {
|
||||
for (auto & x : n) {
|
||||
visit_refs(x);
|
||||
}
|
||||
} else if (n.is_object()) {
|
||||
if (n.contains("$ref")) {
|
||||
std::string ref = n["$ref"];
|
||||
if (_refs.find(ref) == _refs.end()) {
|
||||
json target;
|
||||
if (ref.find("https://") == 0) {
|
||||
std::string base_url = ref.substr(0, ref.find('#'));
|
||||
auto it = _refs.find(base_url);
|
||||
if (it != _refs.end()) {
|
||||
target = it->second;
|
||||
} else {
|
||||
// Fetch the referenced schema and resolve its refs
|
||||
auto referenced = _fetch_json(ref);
|
||||
resolve_refs(referenced, base_url);
|
||||
_refs[base_url] = referenced;
|
||||
}
|
||||
if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) {
|
||||
return;
|
||||
}
|
||||
} else if (ref.find("#/") == 0) {
|
||||
target = schema;
|
||||
n["$ref"] = url + ref;
|
||||
ref = url + ref;
|
||||
} else {
|
||||
_errors.push_back("Unsupported ref: " + ref);
|
||||
return;
|
||||
}
|
||||
std::string pointer = ref.substr(ref.find('#') + 1);
|
||||
std::vector<std::string> tokens = split(pointer, "/");
|
||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||
std::string sel = tokens[i];
|
||||
if (target.is_null() || !target.contains(sel)) {
|
||||
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
||||
return;
|
||||
}
|
||||
target = target[sel];
|
||||
}
|
||||
_refs[ref] = target;
|
||||
}
|
||||
} else {
|
||||
for (auto & kv : n.items()) {
|
||||
visit_refs(kv.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
visit_refs(schema);
|
||||
}
|
||||
|
||||
std::string _generate_constant_rule(const json & value) {
|
||||
if (!value.is_string()) {
|
||||
_errors.push_back("Only std::string constants are supported, got " + value.dump());
|
||||
return "";
|
||||
}
|
||||
return format_literal(value.get<std::string>());
|
||||
}
|
||||
|
||||
std::string visit(const json & schema, const std::string & name) {
|
||||
json schema_type = schema.contains("type") ? schema["type"] : json();
|
||||
std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : "";
|
||||
std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name;
|
||||
|
||||
if (schema.contains("$ref")) {
|
||||
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
||||
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||
} else if (schema_type.is_array()) {
|
||||
std::vector<json> schema_types;
|
||||
for (const auto & t : schema_type) {
|
||||
schema_types.push_back({{"type", t}});
|
||||
}
|
||||
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
||||
} else if (schema.contains("const")) {
|
||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
|
||||
} else if (schema.contains("enum")) {
|
||||
std::vector<std::string> enum_values;
|
||||
for (const auto & v : schema["enum"]) {
|
||||
enum_values.push_back(_generate_constant_rule(v));
|
||||
}
|
||||
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
|
||||
} else if ((schema_type.is_null() || schema_type == "object")
|
||||
&& (schema.contains("properties") ||
|
||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||
std::unordered_set<std::string> required;
|
||||
if (schema.contains("required") && schema["required"].is_array()) {
|
||||
for (const auto & item : schema["required"]) {
|
||||
if (item.is_string()) {
|
||||
required.insert(item.get<std::string>());
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<std::pair<std::string, json>> properties;
|
||||
if (schema.contains("properties")) {
|
||||
for (const auto & prop : schema["properties"].items()) {
|
||||
properties.emplace_back(prop.key(), prop.value());
|
||||
}
|
||||
}
|
||||
return _add_rule(rule_name,
|
||||
_build_object_rule(
|
||||
properties, required, name,
|
||||
schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
||||
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
|
||||
std::unordered_set<std::string> required;
|
||||
std::vector<std::pair<std::string, json>> properties;
|
||||
std::string hybrid_name = name;
|
||||
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
|
||||
if (comp_schema.contains("$ref")) {
|
||||
add_component(_refs[comp_schema["$ref"]], is_required);
|
||||
} else if (comp_schema.contains("properties")) {
|
||||
for (const auto & prop : comp_schema["properties"].items()) {
|
||||
properties.emplace_back(prop.key(), prop.value());
|
||||
if (is_required) {
|
||||
required.insert(prop.key());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// todo warning
|
||||
}
|
||||
};
|
||||
for (auto & t : schema["allOf"]) {
|
||||
if (t.contains("anyOf")) {
|
||||
for (auto & tt : t["anyOf"]) {
|
||||
add_component(tt, false);
|
||||
}
|
||||
} else {
|
||||
add_component(t, true);
|
||||
}
|
||||
}
|
||||
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
|
||||
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
|
||||
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
|
||||
if (items.is_array()) {
|
||||
std::string rule = "\"[\" space ";
|
||||
for (size_t i = 0; i < items.size(); i++) {
|
||||
if (i > 0) {
|
||||
rule += " \",\" space ";
|
||||
}
|
||||
rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i));
|
||||
}
|
||||
rule += " \"]\" space";
|
||||
return _add_rule(rule_name, rule);
|
||||
} else {
|
||||
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
|
||||
std::string list_item_operator = "( \",\" space " + item_rule_name + " )";
|
||||
std::string successive_items;
|
||||
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
|
||||
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
||||
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : -1;
|
||||
if (min_items > 0) {
|
||||
successive_items += repeat(list_item_operator, min_items - 1);
|
||||
min_items--;
|
||||
}
|
||||
if (max_items >= 0 && max_items > min_items) {
|
||||
successive_items += repeat(list_item_operator + "?", max_items - min_items - 1);
|
||||
} else {
|
||||
successive_items += list_item_operator + "*";
|
||||
}
|
||||
std::string rule;
|
||||
if (min_items == 0) {
|
||||
rule = "\"[\" space ( " + item_rule_name + " " + successive_items + " )? \"]\" space";
|
||||
} else {
|
||||
rule = "\"[\" space " + item_rule_name + " " + successive_items + " \"]\" space";
|
||||
}
|
||||
return _add_rule(rule_name, rule);
|
||||
}
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||
return _visit_pattern(schema["pattern"], rule_name);
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {
|
||||
for (const auto & kv : DATE_RULES) {
|
||||
_add_rule(kv.first, kv.second);
|
||||
}
|
||||
return schema_format + "-string";
|
||||
} else if (schema.empty() || schema_type == "object") {
|
||||
for (const auto & n : OBJECT_RULE_NAMES) {
|
||||
_add_rule(n, PRIMITIVE_RULES.at(n));
|
||||
}
|
||||
return _add_rule(rule_name, "object");
|
||||
} else {
|
||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||
return "";
|
||||
}
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||
}
|
||||
}
|
||||
|
||||
void check_errors() {
|
||||
if (!_errors.empty()) {
|
||||
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
||||
}
|
||||
if (!_warnings.empty()) {
|
||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str());
|
||||
}
|
||||
}
|
||||
|
||||
std::string format_grammar() {
|
||||
std::stringstream ss;
|
||||
for (const auto & kv : _rules) {
|
||||
ss << kv.first << " ::= " << kv.second << std::endl;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
std::string json_schema_to_grammar(const json & schema) {
|
||||
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
||||
auto copy = schema;
|
||||
converter.resolve_refs(copy, "input");
|
||||
converter.visit(copy, "");
|
||||
converter.check_errors();
|
||||
return converter.format_grammar();
|
||||
}
|
||||
4
common/json-schema-to-grammar.h
Normal file
4
common/json-schema-to-grammar.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
#include "json.hpp"
|
||||
|
||||
std::string json_schema_to_grammar(const nlohmann::json& schema);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -32,13 +32,13 @@ typedef struct llama_sampling_params {
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.10f; // 1.0 = disabled
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool penalize_nl = true; // consider newlines as a repeatable token
|
||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||
|
||||
std::vector<llama_sampler_type> samplers_sequence = {
|
||||
llama_sampler_type::TOP_K,
|
||||
|
||||
@@ -21,6 +21,7 @@ else()
|
||||
add_subdirectory(embedding)
|
||||
add_subdirectory(finetune)
|
||||
add_subdirectory(gritlm)
|
||||
add_subdirectory(gguf-split)
|
||||
add_subdirectory(infill)
|
||||
add_subdirectory(llama-bench)
|
||||
add_subdirectory(llava)
|
||||
|
||||
@@ -48,6 +48,8 @@ int main(int argc, char ** argv) {
|
||||
params.prompt = "Hello my name is";
|
||||
}
|
||||
|
||||
process_escapes(params.prompt);
|
||||
|
||||
// init LLM
|
||||
|
||||
llama_backend_init();
|
||||
@@ -78,7 +80,7 @@ int main(int argc, char ** argv) {
|
||||
llama_context_params ctx_params = llama_context_default_params();
|
||||
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_batch = std::max(n_len, n_parallel);
|
||||
ctx_params.n_seq_max = n_parallel;
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
|
||||
5
examples/gguf-split/CMakeLists.txt
Normal file
5
examples/gguf-split/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
set(TARGET gguf-split)
|
||||
add_executable(${TARGET} gguf-split.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
9
examples/gguf-split/README.md
Normal file
9
examples/gguf-split/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
## GGUF split Example
|
||||
|
||||
CLI to split / merge GGUF files.
|
||||
|
||||
**Command line options:**
|
||||
|
||||
- `--split`: split GGUF to multiple GGUF, default operation.
|
||||
- `--split-max-tensors`: maximum tensors in each split: default(128)
|
||||
- `--merge`: merge multiple GGUF to a single GGUF.
|
||||
489
examples/gguf-split/gguf-split.cpp
Normal file
489
examples/gguf-split/gguf-split.cpp
Normal file
@@ -0,0 +1,489 @@
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <ios>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
|
||||
enum split_operation : uint8_t {
|
||||
SPLIT_OP_SPLIT,
|
||||
SPLIT_OP_MERGE,
|
||||
};
|
||||
|
||||
static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "general.split";
|
||||
static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = "general.split_count";
|
||||
|
||||
static const int SPLIT_FILENAME_MAX = 256;
|
||||
|
||||
static const char * const SPLIT_FILENAME_FORMAT = "%s-%05d-of-%05d.gguf";
|
||||
|
||||
struct split_params {
|
||||
split_operation operation = SPLIT_OP_SPLIT;
|
||||
int n_split_tensors = 128;
|
||||
std::string input;
|
||||
std::string output;
|
||||
};
|
||||
|
||||
static void split_print_usage(const char * executable) {
|
||||
const split_params default_params;
|
||||
printf("\n");
|
||||
printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
|
||||
printf("\n");
|
||||
printf("Apply a GGUF operation on IN to OUT.");
|
||||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --version show version and build info\n");
|
||||
printf(" --split split GGUF to multiple GGUF (default)\n");
|
||||
printf(" --split-max-tensors max tensors in each split: default(%d)\n", default_params.n_split_tensors);
|
||||
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static bool split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
||||
std::string arg;
|
||||
const std::string arg_prefix = "--";
|
||||
bool invalid_param = false;
|
||||
|
||||
int arg_idx = 1;
|
||||
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
||||
arg = argv[arg_idx];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
bool arg_found = false;
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
split_print_usage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
if (arg == "--version") {
|
||||
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
|
||||
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (arg == "--merge") {
|
||||
arg_found = true;
|
||||
params.operation = SPLIT_OP_MERGE;
|
||||
}
|
||||
if (arg == "--split") {
|
||||
arg_found = true;
|
||||
params.operation = SPLIT_OP_SPLIT;
|
||||
}
|
||||
if (arg == "--split-max-tensors") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
params.n_split_tensors = atoi(argv[arg_idx]);
|
||||
}
|
||||
|
||||
if (!arg_found) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_param) {
|
||||
throw std::invalid_argument("error: invalid parameter for argument: " + arg);
|
||||
}
|
||||
|
||||
if (argc - arg_idx < 2) {
|
||||
printf("%s: bad arguments\n", argv[0]);
|
||||
split_print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
params.input = argv[arg_idx++];
|
||||
params.output = argv[arg_idx++];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
||||
bool result = true;
|
||||
try {
|
||||
if (!split_params_parse_ex(argc, argv, params)) {
|
||||
split_print_usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
catch (const std::invalid_argument & ex) {
|
||||
fprintf(stderr, "%s\n", ex.what());
|
||||
split_print_usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void zeros(std::ofstream & file, size_t n) {
|
||||
char zero = 0;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
file.write(&zero, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static std::string split_file_name(const std::string & path, int i_split, int n_split) {
|
||||
char f_split[SPLIT_FILENAME_MAX] = {0};
|
||||
snprintf(f_split, sizeof(f_split), SPLIT_FILENAME_FORMAT, path.c_str(), i_split + 1, n_split);
|
||||
return std::string(f_split);
|
||||
}
|
||||
|
||||
struct split_strategy {
|
||||
const split_params params;
|
||||
std::ifstream & f_input;
|
||||
struct gguf_context * ctx_gguf;
|
||||
struct ggml_context * ctx_meta = NULL;
|
||||
const int n_tensors;
|
||||
|
||||
const int n_split;
|
||||
int i_split = 0;
|
||||
|
||||
int i_tensor = 0;
|
||||
|
||||
std::vector<uint8_t> read_data;
|
||||
|
||||
struct gguf_context * ctx_out;
|
||||
std::ofstream fout;
|
||||
|
||||
split_strategy(const split_params & params,
|
||||
std::ifstream & f_input,
|
||||
struct gguf_context * ctx_gguf,
|
||||
struct ggml_context * ctx_meta) :
|
||||
params(params),
|
||||
f_input(f_input),
|
||||
ctx_gguf(ctx_gguf),
|
||||
ctx_meta(ctx_meta),
|
||||
n_tensors(gguf_get_n_tensors(ctx_gguf)),
|
||||
n_split(std::ceil(1. * n_tensors / params.n_split_tensors)) {
|
||||
}
|
||||
|
||||
bool should_split() const {
|
||||
return i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
||||
}
|
||||
|
||||
void split_start() {
|
||||
ctx_out = gguf_init_empty();
|
||||
|
||||
// Save all metadata in first split only
|
||||
if (i_split == 0) {
|
||||
gguf_set_kv(ctx_out, ctx_gguf);
|
||||
}
|
||||
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
|
||||
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
|
||||
|
||||
// populate the original tensors, so we get an initial metadata
|
||||
for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
|
||||
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||
gguf_add_tensor(ctx_out, meta);
|
||||
}
|
||||
|
||||
auto split_name = split_file_name(params.output, i_split, n_split);
|
||||
|
||||
fprintf(stderr, "%s: %s ...", __func__, split_name.c_str());
|
||||
fout = std::ofstream(split_name, std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
|
||||
auto meta_size = gguf_get_meta_size(ctx_out);
|
||||
|
||||
// placeholder for the meta data
|
||||
::zeros(fout, meta_size);
|
||||
|
||||
i_split++;
|
||||
}
|
||||
|
||||
void next_tensor() {
|
||||
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||
auto n_bytes = ggml_nbytes(t);
|
||||
|
||||
if (read_data.size() < n_bytes) {
|
||||
read_data.resize(n_bytes);
|
||||
}
|
||||
|
||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
||||
f_input.seekg(offset);
|
||||
f_input.read((char *)read_data.data(), n_bytes);
|
||||
|
||||
t->data = read_data.data();
|
||||
|
||||
// write tensor data + padding
|
||||
fout.write((const char *)t->data, n_bytes);
|
||||
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
||||
|
||||
i_tensor++;
|
||||
}
|
||||
|
||||
void split_end() {
|
||||
// go back to beginning of file and write the updated metadata
|
||||
fout.seekp(0);
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.write((const char *)data.data(), data.size());
|
||||
|
||||
fout.close();
|
||||
gguf_free(ctx_out);
|
||||
|
||||
fprintf(stderr, "\033[3Ddone\n");
|
||||
}
|
||||
};
|
||||
|
||||
static void gguf_split(const split_params & split_params) {
|
||||
struct ggml_context * ctx_meta = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ &ctx_meta,
|
||||
};
|
||||
|
||||
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
|
||||
if (!f_input.is_open()) {
|
||||
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
|
||||
if (!ctx_gguf) {
|
||||
fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
||||
fprintf(stderr, "%s: %s -> %s (%d tensors per file)\n",
|
||||
__func__, split_params.input.c_str(),
|
||||
split_file_name(split_params.output, strategy.i_split, strategy.n_split).c_str(),
|
||||
split_params.n_split_tensors);
|
||||
|
||||
strategy.split_start();
|
||||
|
||||
while (strategy.i_tensor < strategy.n_tensors) {
|
||||
strategy.next_tensor();
|
||||
if (strategy.should_split()) {
|
||||
strategy.split_end();
|
||||
strategy.split_start();
|
||||
}
|
||||
}
|
||||
strategy.split_end();
|
||||
|
||||
gguf_free(ctx_gguf);
|
||||
f_input.close();
|
||||
|
||||
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
||||
__func__, strategy.n_split, strategy.n_tensors);
|
||||
}
|
||||
|
||||
static void gguf_merge(const split_params & split_params) {
|
||||
fprintf(stderr, "%s: %s -> %s\n",
|
||||
__func__, split_params.input.c_str(),
|
||||
split_params.output.c_str());
|
||||
int n_split = 1;
|
||||
int total_tensors = 0;
|
||||
|
||||
auto * ctx_out = gguf_init_empty();
|
||||
std::ofstream fout(split_params.output.c_str(), std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
|
||||
std::vector<uint8_t> read_data;
|
||||
std::vector<ggml_context *> ctx_metas;
|
||||
std::vector<gguf_context *> ctx_ggufs;
|
||||
|
||||
std::string split_prefix;
|
||||
|
||||
// First pass to find KV and tensors metadata
|
||||
for (int i_split = 0; i_split < n_split; i_split++) {
|
||||
struct ggml_context * ctx_meta = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ &ctx_meta,
|
||||
};
|
||||
|
||||
auto split_name = split_params.input;
|
||||
if (i_split > 0) {
|
||||
split_name = split_file_name(split_prefix, i_split, n_split);
|
||||
}
|
||||
fprintf(stderr, "%s: reading metadata %s ...", __func__, split_name.c_str());
|
||||
|
||||
auto * ctx_gguf = gguf_init_from_file(split_name.c_str(), params);
|
||||
if (!ctx_gguf) {
|
||||
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
||||
exit(1);
|
||||
}
|
||||
ctx_ggufs.push_back(ctx_gguf);
|
||||
ctx_metas.push_back(ctx_meta);
|
||||
|
||||
if (i_split == 0) {
|
||||
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT);
|
||||
if (key_n_split < 0) {
|
||||
fprintf(stderr,
|
||||
"\n%s: input file does not contain %s metadata\n",
|
||||
__func__,
|
||||
LLM_KV_GENERAL_SPLIT_N_SPLIT);
|
||||
gguf_free(ctx_gguf);
|
||||
gguf_free(ctx_out);
|
||||
fout.close();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
n_split = gguf_get_val_u8(ctx_gguf, key_n_split);
|
||||
if (n_split < 1) {
|
||||
fprintf(stderr,
|
||||
"\n%s: input file does not contain a valid split count %d\n",
|
||||
__func__,
|
||||
n_split);
|
||||
gguf_free(ctx_gguf);
|
||||
gguf_free(ctx_out);
|
||||
fout.close();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Do not trigger merge if we try to merge again the output
|
||||
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0);
|
||||
|
||||
// Set metadata from the first split
|
||||
gguf_set_kv(ctx_out, ctx_gguf);
|
||||
}
|
||||
|
||||
// Verify the file naming
|
||||
{
|
||||
int i_split_file = 0;
|
||||
int n_split_file = 0;
|
||||
const char * i_split_format = "-00000-of-00000.gguf";
|
||||
|
||||
if (split_name.size() < strlen(i_split_format)) {
|
||||
fprintf(stderr, "\n%s: unexpected input file name: %s\n", __func__, split_params.input.c_str());
|
||||
for (auto * _ctx_gguf : ctx_ggufs) {
|
||||
gguf_free(_ctx_gguf);
|
||||
}
|
||||
gguf_free(ctx_out);
|
||||
fout.close();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
split_prefix = split_name.substr(0, split_name.size() - strlen(i_split_format));
|
||||
|
||||
const char * split_name_c_str = split_name.c_str();
|
||||
int n_part = sscanf(&split_name_c_str[0] + split_prefix.size(), "-%d-of-%d", &i_split_file, &n_split_file);
|
||||
|
||||
if (n_part != 2 || i_split_file - 1 != i_split || n_split_file != n_split) {
|
||||
fprintf(stderr, "\n%s: unexpected input file name: %s"
|
||||
" i_split=%d i_split_file=%d"
|
||||
" n_split=%d n_split_file=%d\n", __func__,
|
||||
split_params.input.c_str(),
|
||||
i_split, i_split_file,
|
||||
n_split, n_split_file);
|
||||
for (auto * _ctx_gguf : ctx_ggufs) {
|
||||
gguf_free(_ctx_gguf);
|
||||
}
|
||||
gguf_free(ctx_out);
|
||||
fout.close();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
||||
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||
gguf_add_tensor(ctx_out, t);
|
||||
}
|
||||
total_tensors += n_tensors;
|
||||
|
||||
fprintf(stderr, "\033[3Ddone\n");
|
||||
}
|
||||
|
||||
// placeholder for the meta data
|
||||
{
|
||||
auto meta_size = gguf_get_meta_size(ctx_out);
|
||||
::zeros(fout, meta_size);
|
||||
}
|
||||
|
||||
// Write tensors data
|
||||
for (int i_split = 0; i_split < n_split; i_split++) {
|
||||
auto split_name = split_file_name(split_prefix, i_split, n_split);
|
||||
std::ifstream f_input(split_name.c_str(), std::ios::binary);
|
||||
if (!f_input.is_open()) {
|
||||
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_name.c_str());
|
||||
for (auto * _ctx_gguf : ctx_ggufs) {
|
||||
gguf_free(_ctx_gguf);
|
||||
}
|
||||
gguf_free(ctx_out);
|
||||
fout.close();
|
||||
exit(1);
|
||||
}
|
||||
fprintf(stderr, "%s: writing tensors %s ...", __func__, split_name.c_str());
|
||||
|
||||
auto * ctx_gguf = ctx_ggufs[i_split];
|
||||
auto * ctx_meta = ctx_metas[i_split];
|
||||
|
||||
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
||||
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||
|
||||
auto n_bytes = ggml_nbytes(t);
|
||||
|
||||
if (read_data.size() < n_bytes) {
|
||||
read_data.resize(n_bytes);
|
||||
}
|
||||
|
||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
||||
f_input.seekg(offset);
|
||||
f_input.read((char *)read_data.data(), n_bytes);
|
||||
|
||||
// write tensor data + padding
|
||||
fout.write((const char *)read_data.data(), n_bytes);
|
||||
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
||||
}
|
||||
|
||||
gguf_free(ctx_gguf);
|
||||
ggml_free(ctx_meta);
|
||||
f_input.close();
|
||||
fprintf(stderr, "\033[3Ddone\n");
|
||||
}
|
||||
|
||||
{
|
||||
// go back to beginning of file and write the updated metadata
|
||||
fout.seekp(0);
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.write((const char *)data.data(), data.size());
|
||||
|
||||
fout.close();
|
||||
gguf_free(ctx_out);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
|
||||
__func__, split_params.output.c_str(), n_split, total_tensors);
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
if (argc < 3) {
|
||||
split_print_usage(argv[0]);
|
||||
}
|
||||
|
||||
split_params params;
|
||||
split_params_parse(argc, argv, params);
|
||||
|
||||
switch (params.operation) {
|
||||
case SPLIT_OP_SPLIT: gguf_split(params);
|
||||
break;
|
||||
case SPLIT_OP_MERGE: gguf_merge(params);
|
||||
break;
|
||||
default:split_print_usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
74
examples/json-schema-pydantic-example.py
Normal file
74
examples/json-schema-pydantic-example.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Usage:
|
||||
#! ./server -m some-model.gguf &
|
||||
#! pip install pydantic
|
||||
#! python json-schema-pydantic-example.py
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from annotated_types import MinLen
|
||||
from typing import Annotated, List, Optional
|
||||
import json, requests
|
||||
|
||||
if True:
|
||||
|
||||
def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
|
||||
'''
|
||||
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
|
||||
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
|
||||
|
||||
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
|
||||
'''
|
||||
if response_model:
|
||||
type_adapter = TypeAdapter(response_model)
|
||||
schema = type_adapter.json_schema()
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
|
||||
}] + messages
|
||||
response_format={"type": "json_object", "schema": schema}
|
||||
|
||||
data = requests.post(endpoint, headers={"Content-Type": "application/json"},
|
||||
json=dict(messages=messages, response_format=response_format, **kwargs)).json()
|
||||
if 'error' in data:
|
||||
raise Exception(data['error']['message'])
|
||||
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
return type_adapter.validate_json(content) if type_adapter else content
|
||||
|
||||
else:
|
||||
|
||||
# This alternative branch uses Instructor + OpenAI client lib.
|
||||
# Instructor support streamed iterable responses, retry & more.
|
||||
# (see https://python.useinstructor.com/)
|
||||
#! pip install instructor openai
|
||||
import instructor, openai
|
||||
client = instructor.patch(
|
||||
openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
|
||||
mode=instructor.Mode.JSON_SCHEMA)
|
||||
create_completion = client.chat.completions.create
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
class QAPair(BaseModel):
|
||||
question: str
|
||||
concise_answer: str
|
||||
justification: str
|
||||
|
||||
class PyramidalSummary(BaseModel):
|
||||
title: str
|
||||
summary: str
|
||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
|
||||
|
||||
print("# Summary\n", create_completion(
|
||||
model="...",
|
||||
response_model=PyramidalSummary,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": f"""
|
||||
You are a highly efficient corporate document summarizer.
|
||||
Create a pyramidal summary of an imaginary internal document about our company processes
|
||||
(starting high-level, going down to each sub sections).
|
||||
Keep questions short, and answers even shorter (trivia / quizz style).
|
||||
"""
|
||||
}]))
|
||||
@@ -1,8 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
|
||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||
# whitespace. Also maybe improves generation quality?
|
||||
@@ -12,22 +14,50 @@ PRIMITIVE_RULES = {
|
||||
'boolean': '("true" | "false") space',
|
||||
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
'value' : 'object | array | string | number | boolean',
|
||||
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
||||
'array' : '"[" space ( value ("," space value)* )? "]" space',
|
||||
'uuid' : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space',
|
||||
'string': r''' "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space ''',
|
||||
)* "\"" space''',
|
||||
'null': '"null" space',
|
||||
}
|
||||
OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']
|
||||
|
||||
# TODO: support "uri", "email" string formats
|
||||
DATE_RULES = {
|
||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
||||
'date-time': 'date "T" time',
|
||||
'date-string': '"\\"" date "\\"" space',
|
||||
'time-string': '"\\"" time "\\"" space',
|
||||
'date-time-string': '"\\"" date-time "\\"" space',
|
||||
}
|
||||
|
||||
RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()])
|
||||
|
||||
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'}
|
||||
GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]')
|
||||
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
|
||||
|
||||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
||||
|
||||
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
||||
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
||||
|
||||
class SchemaConverter:
|
||||
def __init__(self, prop_order):
|
||||
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
||||
self._prop_order = prop_order
|
||||
self._allow_fetch = allow_fetch
|
||||
self._dotall = dotall
|
||||
self._raw_pattern = raw_pattern
|
||||
self._rules = {'space': SPACE_RULE}
|
||||
self._refs = {}
|
||||
self._refs_being_resolved = set()
|
||||
|
||||
def _format_literal(self, literal):
|
||||
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
||||
@@ -41,78 +71,421 @@ class SchemaConverter:
|
||||
key = esc_name
|
||||
else:
|
||||
i = 0
|
||||
while f'{esc_name}{i}' in self._rules:
|
||||
while f'{esc_name}{i}' in self._rules and self._rules[f'{esc_name}{i}'] != rule:
|
||||
i += 1
|
||||
key = f'{esc_name}{i}'
|
||||
self._rules[key] = rule
|
||||
return key
|
||||
|
||||
def resolve_refs(self, schema: dict, url: str):
|
||||
'''
|
||||
Resolves all $ref fields in the given schema, fetching any remote schemas,
|
||||
replacing $ref with absolute reference URL and populating self._refs with the
|
||||
respective referenced (sub)schema dictionaries.
|
||||
'''
|
||||
def visit(n: dict):
|
||||
if isinstance(n, list):
|
||||
return [visit(x) for x in n]
|
||||
elif isinstance(n, dict):
|
||||
ref = n.get('$ref')
|
||||
if ref is not None and ref not in self._refs:
|
||||
if ref.startswith('https://'):
|
||||
assert self._allow_fetch, 'Fetching remote schemas is not allowed (use --allow-fetch for force)'
|
||||
import requests
|
||||
|
||||
frag_split = ref.split('#')
|
||||
base_url = frag_split[0]
|
||||
|
||||
target = self._refs.get(base_url)
|
||||
if target is None:
|
||||
target = self.resolve_refs(requests.get(ref).json(), base_url)
|
||||
self._refs[base_url] = target
|
||||
|
||||
if len(frag_split) == 1 or frag_split[-1] == '':
|
||||
return target
|
||||
elif ref.startswith('#/'):
|
||||
target = schema
|
||||
ref = f'{url}{ref}'
|
||||
n['$ref'] = ref
|
||||
else:
|
||||
raise ValueError(f'Unsupported ref {ref}')
|
||||
|
||||
for sel in ref.split('#')[-1].split('/')[1:]:
|
||||
assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
|
||||
target = target[sel]
|
||||
|
||||
self._refs[ref] = target
|
||||
else:
|
||||
for v in n.values():
|
||||
visit(v)
|
||||
|
||||
return n
|
||||
return visit(schema)
|
||||
|
||||
def _generate_union_rule(self, name, alt_schemas):
|
||||
return ' | '.join((
|
||||
self.visit(alt_schema, f'{name}{"-" if name else "alternative-"}{i}')
|
||||
for i, alt_schema in enumerate(alt_schemas)
|
||||
))
|
||||
|
||||
def _visit_pattern(self, pattern, name):
|
||||
'''
|
||||
Transforms a regular expression pattern into a GBNF rule.
|
||||
|
||||
Input: https://json-schema.org/understanding-json-schema/reference/regular_expressions
|
||||
Output: https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
|
||||
|
||||
Unsupported features: negative/positive lookaheads, greedy/non-greedy modifiers.
|
||||
|
||||
Mostly a 1:1 translation, except for {x} / {x,} / {x,y} quantifiers for which
|
||||
we define sub-rules to keep the output lean.
|
||||
'''
|
||||
|
||||
assert pattern.startswith('^') and pattern.endswith('$'), 'Pattern must start with "^" and end with "$"'
|
||||
pattern = pattern[1:-1]
|
||||
sub_rule_ids = {}
|
||||
|
||||
i = 0
|
||||
length = len(pattern)
|
||||
|
||||
def to_rule(s: Tuple[str, bool]) -> str:
|
||||
(txt, is_literal) = s
|
||||
return "\"" + txt + "\"" if is_literal else txt
|
||||
|
||||
def transform() -> Tuple[str, bool]:
|
||||
'''
|
||||
Parse a unit at index i (advancing it), and return its string representation + whether it's a literal.
|
||||
'''
|
||||
nonlocal i
|
||||
nonlocal pattern
|
||||
nonlocal sub_rule_ids
|
||||
|
||||
start = i
|
||||
# For each component of this sequence, store its string representation and whether it's a literal.
|
||||
# We only need a flat structure here to apply repetition operators to the last item, and
|
||||
# to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
|
||||
# (GBNF's syntax is luckily very close to regular expressions!)
|
||||
seq: list[Tuple[str, bool]] = []
|
||||
|
||||
def get_dot():
|
||||
if self._dotall:
|
||||
rule = '[\\U00000000-\\U0010FFFF]'
|
||||
else:
|
||||
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
||||
return self._add_rule(f'dot', rule)
|
||||
|
||||
def join_seq():
|
||||
nonlocal seq
|
||||
ret = []
|
||||
for is_literal, g in itertools.groupby(seq, lambda x: x[1]):
|
||||
if is_literal:
|
||||
ret.append((''.join(x[0] for x in g), True))
|
||||
else:
|
||||
ret.extend(g)
|
||||
if len(ret) == 1:
|
||||
return ret[0]
|
||||
return (' '.join(to_rule(x) for x in seq), False)
|
||||
|
||||
while i < length:
|
||||
c = pattern[i]
|
||||
if c == '.':
|
||||
seq.append((get_dot(), False))
|
||||
i += 1
|
||||
elif c == '(':
|
||||
i += 1
|
||||
if i < length:
|
||||
assert pattern[i] != '?', f'Unsupported pattern syntax "{pattern[i]}" at index {i} of /{pattern}/'
|
||||
seq.append((f'({to_rule(transform())})', False))
|
||||
elif c == ')':
|
||||
i += 1
|
||||
assert start > 0 and pattern[start-1] == '(', f'Unbalanced parentheses; start = {start}, i = {i}, pattern = {pattern}'
|
||||
return join_seq()
|
||||
elif c == '[':
|
||||
square_brackets = c
|
||||
i += 1
|
||||
while i < length and pattern[i] != ']':
|
||||
if pattern[i] == '\\':
|
||||
square_brackets += pattern[i:i+2]
|
||||
i += 2
|
||||
else:
|
||||
square_brackets += pattern[i]
|
||||
i += 1
|
||||
assert i < length, f'Unbalanced square brackets; start = {start}, i = {i}, pattern = {pattern}'
|
||||
square_brackets += ']'
|
||||
i += 1
|
||||
seq.append((square_brackets, False))
|
||||
elif c == '|':
|
||||
seq.append(('|', False))
|
||||
i += 1
|
||||
elif c in ('*', '+', '?'):
|
||||
seq[-1] = (to_rule(seq[-1]) + c, False)
|
||||
i += 1
|
||||
elif c == '{':
|
||||
curly_brackets = c
|
||||
i += 1
|
||||
while i < length and pattern[i] != '}':
|
||||
curly_brackets += pattern[i]
|
||||
i += 1
|
||||
assert i < length, f'Unbalanced curly brackets; start = {start}, i = {i}, pattern = {pattern}'
|
||||
curly_brackets += '}'
|
||||
i += 1
|
||||
nums = [s.strip() for s in curly_brackets[1:-1].split(',')]
|
||||
min_times = 0
|
||||
max_times = None
|
||||
try:
|
||||
if len(nums) == 1:
|
||||
min_times = int(nums[0])
|
||||
max_times = min_times
|
||||
else:
|
||||
assert len(nums) == 2
|
||||
min_times = int(nums[0]) if nums[0] else 0
|
||||
max_times = int(nums[1]) if nums[1] else None
|
||||
except ValueError:
|
||||
raise ValueError(f'Invalid quantifier {curly_brackets} in /{pattern}/')
|
||||
|
||||
(sub, sub_is_literal) = seq[-1]
|
||||
|
||||
if min_times == 0 and max_times is None:
|
||||
seq[-1] = (f'{sub}*', False)
|
||||
elif min_times == 0 and max_times == 1:
|
||||
seq[-1] = (f'{sub}?', False)
|
||||
elif min_times == 1 and max_times is None:
|
||||
seq[-1] = (f'{sub}+', False)
|
||||
else:
|
||||
if not sub_is_literal:
|
||||
id = sub_rule_ids.get(sub)
|
||||
if id is None:
|
||||
id = self._add_rule(f'{name}-{len(sub_rule_ids) + 1}', sub)
|
||||
sub_rule_ids[sub] = id
|
||||
sub = id
|
||||
|
||||
seq[-1] = (
|
||||
' '.join(
|
||||
([f'"{sub[1:-1] * min_times}"'] if sub_is_literal else [sub] * min_times) +
|
||||
([f'{sub}?'] * (max_times - min_times) if max_times is not None else [f'{sub}*'])),
|
||||
False
|
||||
)
|
||||
else:
|
||||
literal = ''
|
||||
while i < length:
|
||||
if pattern[i] == '\\' and i < length - 1:
|
||||
next = pattern[i + 1]
|
||||
if next in ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS:
|
||||
i += 1
|
||||
literal += pattern[i]
|
||||
i += 1
|
||||
else:
|
||||
literal += pattern[i:i+2]
|
||||
i += 2
|
||||
elif pattern[i] == '"' and not self._raw_pattern:
|
||||
literal += '\\"'
|
||||
i += 1
|
||||
elif pattern[i] not in NON_LITERAL_SET and \
|
||||
(i == length - 1 or literal == '' or pattern[i+1] == '.' or pattern[i+1] not in NON_LITERAL_SET):
|
||||
literal += pattern[i]
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
if literal:
|
||||
seq.append((literal, True))
|
||||
|
||||
return join_seq()
|
||||
|
||||
return self._add_rule(
|
||||
name,
|
||||
to_rule(transform()) if self._raw_pattern \
|
||||
else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space")
|
||||
|
||||
|
||||
def _resolve_ref(self, ref):
|
||||
ref_name = ref.split('/')[-1]
|
||||
if ref_name not in self._rules and ref not in self._refs_being_resolved:
|
||||
self._refs_being_resolved.add(ref)
|
||||
resolved = self._refs[ref]
|
||||
ref_name = self.visit(resolved, ref_name)
|
||||
self._refs_being_resolved.remove(ref)
|
||||
return ref_name
|
||||
|
||||
def _generate_constant_rule(self, value):
|
||||
assert isinstance(value, str), f'Only string constants are supported, got {value}'
|
||||
return self._format_literal(value)
|
||||
|
||||
def visit(self, schema, name):
|
||||
schema_type = schema.get('type')
|
||||
rule_name = name or 'root'
|
||||
schema_format = schema.get('format')
|
||||
rule_name = name + '-' if name in RESERVED_NAMES else name or 'root'
|
||||
|
||||
if 'oneOf' in schema or 'anyOf' in schema:
|
||||
rule = ' | '.join((
|
||||
self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
|
||||
for i, alt_schema in enumerate(schema.get('oneOf') or schema['anyOf'])
|
||||
))
|
||||
return self._add_rule(rule_name, rule)
|
||||
if (ref := schema.get('$ref')) is not None:
|
||||
return self._add_rule(rule_name, self._resolve_ref(ref))
|
||||
|
||||
elif 'oneOf' in schema or 'anyOf' in schema:
|
||||
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
||||
|
||||
elif isinstance(schema_type, list):
|
||||
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
|
||||
|
||||
elif 'const' in schema:
|
||||
return self._add_rule(rule_name, self._format_literal(schema['const']))
|
||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
|
||||
|
||||
elif 'enum' in schema:
|
||||
rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
|
||||
rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type == 'object' and 'properties' in schema:
|
||||
# TODO: `required` keyword
|
||||
prop_order = self._prop_order
|
||||
prop_pairs = sorted(
|
||||
schema['properties'].items(),
|
||||
# sort by position in prop_order (if specified) then by key
|
||||
key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
|
||||
elif schema_type in (None, 'object') and \
|
||||
('properties' in schema or \
|
||||
('additionalProperties' in schema and schema['additionalProperties'] is not True)):
|
||||
required = set(schema.get('required', []))
|
||||
properties = list(schema.get('properties', {}).items())
|
||||
return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
|
||||
|
||||
elif schema_type in (None, 'object') and 'allOf' in schema:
|
||||
required = set()
|
||||
properties = []
|
||||
hybrid_name = name
|
||||
def add_component(comp_schema, is_required):
|
||||
if (ref := comp_schema.get('$ref')) is not None:
|
||||
comp_schema = self._refs[ref]
|
||||
|
||||
if 'properties' in comp_schema:
|
||||
for prop_name, prop_schema in comp_schema['properties'].items():
|
||||
properties.append((prop_name, prop_schema))
|
||||
if is_required:
|
||||
required.add(prop_name)
|
||||
|
||||
for t in schema['allOf']:
|
||||
if 'anyOf' in t:
|
||||
for tt in t['anyOf']:
|
||||
add_component(tt, is_required=False)
|
||||
else:
|
||||
add_component(t, is_required=True)
|
||||
|
||||
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
|
||||
|
||||
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
|
||||
items = schema.get('items') or schema['prefixItems']
|
||||
if isinstance(items, list):
|
||||
return self._add_rule(
|
||||
rule_name,
|
||||
'"[" space ' +
|
||||
' "," space '.join(
|
||||
self.visit(item, f'{name}{"-" if name else ""}tuple-{i}')
|
||||
for i, item in enumerate(items)) +
|
||||
' "]" space')
|
||||
else:
|
||||
item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item')
|
||||
list_item_operator = f'( "," space {item_rule_name} )'
|
||||
successive_items = ""
|
||||
min_items = schema.get("minItems", 0)
|
||||
max_items = schema.get("maxItems")
|
||||
if min_items > 0:
|
||||
successive_items = list_item_operator * (min_items - 1)
|
||||
min_items -= 1
|
||||
if max_items is not None and max_items > min_items:
|
||||
successive_items += (list_item_operator + "?") * (max_items - min_items - 1)
|
||||
else:
|
||||
successive_items += list_item_operator + "*"
|
||||
if min_items == 0:
|
||||
rule = f'"[" space ( {item_rule_name} {successive_items} )? "]" space'
|
||||
else:
|
||||
rule = f'"[" space {item_rule_name} {successive_items} "]" space'
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type in (None, 'string') and 'pattern' in schema:
|
||||
return self._visit_pattern(schema['pattern'], rule_name)
|
||||
|
||||
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
||||
return self._add_rule(
|
||||
'root' if rule_name == 'root' else schema_format,
|
||||
PRIMITIVE_RULES['uuid']
|
||||
)
|
||||
|
||||
rule = '"{" space'
|
||||
for i, (prop_name, prop_schema) in enumerate(prop_pairs):
|
||||
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
|
||||
if i > 0:
|
||||
rule += ' "," space'
|
||||
rule += fr' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
|
||||
rule += ' "}" space'
|
||||
elif schema_type in (None, 'string') and schema_format in DATE_RULES:
|
||||
for t, r in DATE_RULES.items():
|
||||
self._add_rule(t, r)
|
||||
return schema_format + '-string'
|
||||
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type == 'array' and 'items' in schema:
|
||||
# TODO `prefixItems` keyword
|
||||
item_rule_name = self.visit(schema['items'], f'{name}{"-" if name else ""}item')
|
||||
list_item_operator = f'("," space {item_rule_name})'
|
||||
successive_items = ""
|
||||
min_items = schema.get("minItems", 0)
|
||||
if min_items > 0:
|
||||
first_item = f"({item_rule_name})"
|
||||
successive_items = list_item_operator * (min_items - 1)
|
||||
min_items -= 1
|
||||
else:
|
||||
first_item = f"({item_rule_name})?"
|
||||
max_items = schema.get("maxItems")
|
||||
if max_items is not None and max_items > min_items:
|
||||
successive_items += (list_item_operator + "?") * (max_items - min_items - 1)
|
||||
else:
|
||||
successive_items += list_item_operator + "*"
|
||||
rule = f'"[" space {first_item} {successive_items} "]" space'
|
||||
return self._add_rule(rule_name, rule)
|
||||
elif (schema_type == 'object') or (len(schema) == 0):
|
||||
for n in OBJECT_RULE_NAMES:
|
||||
self._add_rule(n, PRIMITIVE_RULES[n])
|
||||
return self._add_rule(rule_name, 'object')
|
||||
|
||||
else:
|
||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return self._add_rule(
|
||||
'root' if rule_name == 'root' else schema_type,
|
||||
PRIMITIVE_RULES[schema_type]
|
||||
)
|
||||
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||
prop_order = self._prop_order
|
||||
# sort by position in prop_order (if specified) then by original order
|
||||
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
|
||||
|
||||
prop_kv_rule_names = {}
|
||||
for prop_name, prop_schema in properties:
|
||||
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
|
||||
prop_kv_rule_names[prop_name] = self._add_rule(
|
||||
f'{name}{"-" if name else ""}{prop_name}-kv',
|
||||
fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}'
|
||||
)
|
||||
required_props = [k for k in sorted_props if k in required]
|
||||
optional_props = [k for k in sorted_props if k not in required]
|
||||
|
||||
if additional_properties == True or isinstance(additional_properties, dict):
|
||||
sub_name = f'{name}{"-" if name else ""}additional'
|
||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||
prop_kv_rule_names["*"] = self._add_rule(
|
||||
f'{sub_name}-kv',
|
||||
self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||
)
|
||||
optional_props.append("*")
|
||||
|
||||
rule = '"{" space '
|
||||
rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props)
|
||||
|
||||
if optional_props:
|
||||
rule += ' ('
|
||||
if required_props:
|
||||
rule += ' "," space ( '
|
||||
|
||||
def get_recursive_refs(ks, first_is_optional):
|
||||
[k, *rest] = ks
|
||||
kv_rule_name = prop_kv_rule_names[k]
|
||||
if k == '*':
|
||||
res = self._add_rule(
|
||||
f'{name}{"-" if name else ""}additional-kvs',
|
||||
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
|
||||
)
|
||||
elif first_is_optional:
|
||||
res = f'( "," space {kv_rule_name} )?'
|
||||
else:
|
||||
res = kv_rule_name
|
||||
if len(rest) > 0:
|
||||
res += ' ' + self._add_rule(
|
||||
f'{name}{"-" if name else ""}{k}-rest',
|
||||
get_recursive_refs(rest, first_is_optional=True)
|
||||
)
|
||||
return res
|
||||
|
||||
rule += ' | '.join(
|
||||
get_recursive_refs(optional_props[i:], first_is_optional=False)
|
||||
for i in range(len(optional_props))
|
||||
)
|
||||
if required_props:
|
||||
rule += ' )'
|
||||
rule += ' )?'
|
||||
|
||||
rule += ' "}" space'
|
||||
|
||||
return rule
|
||||
|
||||
def format_grammar(self):
|
||||
return '\n'.join((f'{name} ::= {rule}' for name, rule in self._rules.items()))
|
||||
return '\n'.join(
|
||||
f'{name} ::= {rule}'
|
||||
for name, rule in sorted(self._rules.items(), key=lambda kv: kv[0])
|
||||
)
|
||||
|
||||
|
||||
def main(args_in = None):
|
||||
@@ -129,16 +502,47 @@ def main(args_in = None):
|
||||
type=lambda s: s.split(','),
|
||||
help='''
|
||||
comma-separated property names defining the order of precedence for object properties;
|
||||
properties not specified here are given lower precedence than those that are, and are
|
||||
sorted alphabetically
|
||||
properties not specified here are given lower precedence than those that are, and
|
||||
are kept in their original order from the schema. Required properties are always
|
||||
given precedence over optional properties.
|
||||
'''
|
||||
)
|
||||
parser.add_argument(
|
||||
'--allow-fetch',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Whether to allow fetching referenced schemas over HTTPS')
|
||||
parser.add_argument(
|
||||
'--dotall',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Whether to treat dot (".") as matching all chars including line breaks in regular expression patterns')
|
||||
parser.add_argument(
|
||||
'--raw-pattern',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Treats string patterns as raw patterns w/o quotes (or quote escapes)')
|
||||
|
||||
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
|
||||
args = parser.parse_args(args_in)
|
||||
|
||||
schema = json.load(sys.stdin if args.schema == '-' else open(args.schema))
|
||||
prop_order = {name: idx for idx, name in enumerate(args.prop_order)}
|
||||
converter = SchemaConverter(prop_order)
|
||||
if args.schema.startswith('https://'):
|
||||
url = args.schema
|
||||
import requests
|
||||
schema = requests.get(url).json()
|
||||
elif args.schema == '-':
|
||||
url = 'stdin'
|
||||
schema = json.load(sys.stdin)
|
||||
else:
|
||||
url = f'file://{args.schema}'
|
||||
with open(args.schema) as f:
|
||||
schema = json.load(f)
|
||||
converter = SchemaConverter(
|
||||
prop_order={name: idx for idx, name in enumerate(args.prop_order)},
|
||||
allow_fetch=args.allow_fetch,
|
||||
dotall=args.dotall,
|
||||
raw_pattern=args.raw_pattern)
|
||||
schema = converter.resolve_refs(schema, url)
|
||||
converter.visit(schema, '')
|
||||
print(converter.format_grammar())
|
||||
|
||||
|
||||
@@ -249,6 +249,9 @@ static ggml_type ggml_type_from_name(const std::string & s) {
|
||||
if (s == "q5_1") {
|
||||
return GGML_TYPE_Q5_1;
|
||||
}
|
||||
if (s == "iq4_nl") {
|
||||
return GGML_TYPE_IQ4_NL;
|
||||
}
|
||||
|
||||
return GGML_TYPE_COUNT;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# MobileVLM
|
||||
|
||||
Currently this implementation supports [MobileVLM-v1.7](https://huggingface.co/mtgv/MobileVLM-1.7B) variants.
|
||||
Currently this implementation supports [MobileVLM-1.7B](https://huggingface.co/mtgv/MobileVLM-1.7B) / [MobileVLM_V2-1.7B](https://huggingface.co/mtgv/MobileVLM_V2-1.7B) variants.
|
||||
|
||||
for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com/Meituan-AutoML/MobileVLM)
|
||||
|
||||
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
||||
|
||||
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown.
|
||||
|
||||
## Usage
|
||||
Build with cmake or run `make llava-cli` to build it.
|
||||
|
||||
@@ -34,7 +36,7 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
||||
python ./examples/llava/llava-surgery.py -m path/to/MobileVLM-1.7B
|
||||
```
|
||||
|
||||
3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` to convert the LLaVA image encoder to GGUF:
|
||||
3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` (for **V2** the arg is `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
|
||||
|
||||
```sh
|
||||
python ./examples/llava/convert-image-encoder-to-gguf \
|
||||
@@ -44,6 +46,14 @@ python ./examples/llava/convert-image-encoder-to-gguf \
|
||||
--projector-type ldp
|
||||
```
|
||||
|
||||
```sh
|
||||
python ./examples/llava/convert-image-encoder-to-gguf \
|
||||
-m path/to/clip-vit-large-patch14-336 \
|
||||
--llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \
|
||||
--output-dir path/to/MobileVLM-1.7B_V2 \
|
||||
--projector-type ldpv2
|
||||
```
|
||||
|
||||
4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF:
|
||||
|
||||
```sh
|
||||
|
||||
@@ -119,6 +119,7 @@ static std::string format(const char * fmt, ...) {
|
||||
#define TN_LLAVA_PROJ "mm.%d.%s"
|
||||
#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s"
|
||||
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
|
||||
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
|
||||
#define TN_IMAGE_NEWLINE "model.image_newline"
|
||||
|
||||
|
||||
@@ -126,12 +127,14 @@ enum projector_type {
|
||||
PROJECTOR_TYPE_MLP,
|
||||
PROJECTOR_TYPE_MLP_NORM,
|
||||
PROJECTOR_TYPE_LDP,
|
||||
PROJECTOR_TYPE_LDPV2,
|
||||
PROJECTOR_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||
{ PROJECTOR_TYPE_MLP, "mlp" },
|
||||
{ PROJECTOR_TYPE_LDP, "ldp" },
|
||||
{ PROJECTOR_TYPE_LDPV2, "ldpv2"},
|
||||
};
|
||||
|
||||
|
||||
@@ -475,6 +478,14 @@ struct clip_vision_model {
|
||||
struct ggml_tensor * mm_model_block_2_block_2_0_w;
|
||||
struct ggml_tensor * mm_model_block_2_block_2_1_w;
|
||||
struct ggml_tensor * mm_model_block_2_block_2_1_b;
|
||||
|
||||
// MobileVLM_V2 projection
|
||||
struct ggml_tensor * mm_model_mlp_0_w;
|
||||
struct ggml_tensor * mm_model_mlp_0_b;
|
||||
struct ggml_tensor * mm_model_mlp_2_w;
|
||||
struct ggml_tensor * mm_model_mlp_2_b;
|
||||
struct ggml_tensor * mm_model_peg_0_w;
|
||||
struct ggml_tensor * mm_model_peg_0_b;
|
||||
};
|
||||
|
||||
struct clip_ctx {
|
||||
@@ -497,7 +508,6 @@ struct clip_ctx {
|
||||
|
||||
// memory buffers to evaluate the model
|
||||
ggml_backend_buffer_t params_buffer = NULL;
|
||||
ggml_backend_buffer_t compute_buffer = NULL;
|
||||
|
||||
ggml_backend_t backend = NULL;
|
||||
ggml_gallocr_t compute_alloc = NULL;
|
||||
@@ -808,6 +818,29 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
}
|
||||
embeddings = block_1;
|
||||
}
|
||||
else if (ctx->proj_type == PROJECTOR_TYPE_LDPV2)
|
||||
{
|
||||
int n_patch = 24;
|
||||
struct ggml_tensor * mlp_0 = ggml_mul_mat(ctx0, model.mm_model_mlp_0_w, embeddings);
|
||||
mlp_0 = ggml_add(ctx0, mlp_0, model.mm_model_mlp_0_b);
|
||||
mlp_0 = ggml_gelu(ctx0, mlp_0);
|
||||
struct ggml_tensor * mlp_2 = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, mlp_0);
|
||||
mlp_2 = ggml_add(ctx0, mlp_2, model.mm_model_mlp_2_b);
|
||||
// mlp_2 ne = [2048, 576, 1, 1]
|
||||
// // AVG Pool Layer 2*2, strides = 2
|
||||
mlp_2 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_2, 1, 0, 2, 3));
|
||||
// mlp_2 ne = [576, 2048, 1, 1]
|
||||
mlp_2 = ggml_reshape_4d(ctx0, mlp_2, n_patch, n_patch, mlp_2->ne[1], mlp_2->ne[2]);
|
||||
// mlp_2 ne [24, 24, 2048, 1]
|
||||
mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
|
||||
// weight ne = [3, 3, 2048, 1]
|
||||
struct ggml_tensor * peg_0 = ggml_conv_depthwise_2d(ctx0, model.mm_model_peg_0_w, mlp_2, 1, 1, 1, 1, 1, 1);
|
||||
peg_0 = ggml_add(ctx0, peg_0, mlp_2);
|
||||
peg_0 = ggml_cont(ctx0, ggml_permute(ctx0, peg_0, 1, 2, 0, 3));
|
||||
peg_0 = ggml_add(ctx0, peg_0, model.mm_model_peg_0_b);
|
||||
peg_0 = ggml_reshape_3d(ctx0, peg_0, peg_0->ne[0], peg_0->ne[1] * peg_0->ne[2], peg_0->ne[3]);
|
||||
embeddings = peg_0;
|
||||
}
|
||||
else {
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
@@ -1178,7 +1211,18 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
vision_model.mm_model_block_2_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight"));
|
||||
vision_model.mm_model_block_2_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight"));
|
||||
vision_model.mm_model_block_2_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias"));
|
||||
} else {
|
||||
}
|
||||
else if (new_clip->proj_type == PROJECTOR_TYPE_LDPV2)
|
||||
{
|
||||
// MobilVLM_V2 projection
|
||||
vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "weight"));
|
||||
vision_model.mm_model_mlp_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "bias"));
|
||||
vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "weight"));
|
||||
vision_model.mm_model_mlp_2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "bias"));
|
||||
vision_model.mm_model_peg_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "weight"));
|
||||
vision_model.mm_model_peg_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "bias"));
|
||||
}
|
||||
else {
|
||||
std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type];
|
||||
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
|
||||
}
|
||||
@@ -1676,6 +1720,9 @@ void clip_free(clip_ctx * ctx) {
|
||||
ggml_free(ctx->ctx_data);
|
||||
gguf_free(ctx->ctx_gguf);
|
||||
|
||||
ggml_backend_buffer_free(ctx->params_buffer);
|
||||
ggml_backend_free(ctx->backend);
|
||||
ggml_gallocr_free(ctx->compute_alloc);
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
@@ -1964,6 +2011,9 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
|
||||
return ctx->vision_model.mm_model_block_1_block_2_1_b->ne[0];
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_LDPV2) {
|
||||
return ctx->vision_model.mm_model_peg_0_b->ne[0];
|
||||
}
|
||||
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
|
||||
return ctx->vision_model.mm_2_b->ne[0];
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
@@ -38,9 +39,11 @@ def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: b
|
||||
def get_tensor_name(name: str) -> str:
|
||||
if "projection" in name:
|
||||
return name
|
||||
|
||||
if "mm_projector" in name:
|
||||
return name.replace("model.mm_projector", "mm")
|
||||
name = name.replace("model.mm_projector", "mm")
|
||||
name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1)
|
||||
name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1)
|
||||
return name
|
||||
|
||||
return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln")
|
||||
|
||||
@@ -83,7 +86,7 @@ ap.add_argument("--clip-model-is-vision", action="store_true", required=False,
|
||||
ap.add_argument("--clip-model-is-openclip", action="store_true", required=False,
|
||||
help="The clip model is from openclip (for ViT-SO400M type))")
|
||||
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
||||
ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp", choices=["mlp", "ldp"], default="mlp")
|
||||
ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2"], default="mlp")
|
||||
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
||||
# Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711
|
||||
# Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5
|
||||
|
||||
20
examples/regex-to-grammar.py
Normal file
20
examples/regex-to-grammar.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import json, subprocess, sys, os
|
||||
|
||||
assert len(sys.argv) >= 2
|
||||
[_, pattern, *rest] = sys.argv
|
||||
|
||||
print(subprocess.check_output(
|
||||
[
|
||||
"python",
|
||||
os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
"json-schema-to-grammar.py"),
|
||||
*rest,
|
||||
"-",
|
||||
"--raw-pattern",
|
||||
],
|
||||
text=True,
|
||||
input=json.dumps({
|
||||
"type": "string",
|
||||
"pattern": pattern,
|
||||
}, indent=2)))
|
||||
@@ -2,12 +2,16 @@ set(TARGET server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
add_executable(${TARGET}
|
||||
server.cpp
|
||||
utils.hpp
|
||||
httplib.h
|
||||
)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE common json-schema-to-grammar ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (LLAMA_SERVER_SSL)
|
||||
find_package(OpenSSL REQUIRED)
|
||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||
|
||||
@@ -260,7 +260,7 @@ node index.js
|
||||
|
||||
`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
|
||||
|
||||
`slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||
`id_slot`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||
|
||||
`cache_prompt`: Re-use previously cached prompt from the last request if possible. This may prevent re-caching the prompt from scratch. (default: false)
|
||||
|
||||
|
||||
@@ -26,8 +26,9 @@ const propOrder = grammarJsonSchemaPropOrder
|
||||
|
||||
let grammar = null
|
||||
if (grammarJsonSchemaFile) {
|
||||
const schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
|
||||
const converter = new SchemaConverter(propOrder)
|
||||
let schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
|
||||
const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true})
|
||||
schema = await converter.resolveRefs(schema, grammarJsonSchemaFile)
|
||||
converter.visit(schema, '')
|
||||
grammar = converter.formatGrammar()
|
||||
}
|
||||
|
||||
@@ -483,4 +483,4 @@ unsigned char completion_js[] = {
|
||||
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||
};
|
||||
unsigned int completion_js_len = 5796;
|
||||
size_t completion_js_len = 5796;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -630,14 +630,16 @@
|
||||
|
||||
const grammarJsonSchemaPropOrder = signal('')
|
||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||
const convertJSONSchemaGrammar = () => {
|
||||
const convertJSONSchemaGrammar = async () => {
|
||||
try {
|
||||
const schema = JSON.parse(params.value.grammar)
|
||||
const converter = new SchemaConverter(
|
||||
grammarJsonSchemaPropOrder.value
|
||||
let schema = JSON.parse(params.value.grammar)
|
||||
const converter = new SchemaConverter({
|
||||
prop_order: grammarJsonSchemaPropOrder.value
|
||||
.split(',')
|
||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {})
|
||||
)
|
||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
||||
allow_fetch: true,
|
||||
})
|
||||
schema = await converter.resolveRefs(schema, 'input')
|
||||
converter.visit(schema, '')
|
||||
params.value = {
|
||||
...params.value,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,25 +1,50 @@
|
||||
// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
|
||||
const SPACE_RULE = '" "?';
|
||||
|
||||
const PRIMITIVE_RULES = {
|
||||
boolean: '("true" | "false") space',
|
||||
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
value: 'object | array | string | number | boolean',
|
||||
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
||||
array: '"[" space ( value ("," space value)* )? "]" space',
|
||||
uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space',
|
||||
string: ` "\\"" (
|
||||
[^"\\\\] |
|
||||
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\\"" space`,
|
||||
null: '"null" space',
|
||||
};
|
||||
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
|
||||
|
||||
// TODO: support "uri", "email" string formats
|
||||
const DATE_RULES = {
|
||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
||||
'date-time': 'date "T" time',
|
||||
'date-string': '"\\"" date "\\"" space',
|
||||
'time-string': '"\\"" time "\\"" space',
|
||||
'date-time-string': '"\\"" date-time "\\"" space',
|
||||
};
|
||||
|
||||
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES};
|
||||
|
||||
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
||||
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
||||
const GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'};
|
||||
const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
|
||||
const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
|
||||
|
||||
const NON_LITERAL_SET = new Set('|.()[]{}*+?');
|
||||
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('[]()|{}*+?');
|
||||
|
||||
export class SchemaConverter {
|
||||
constructor(propOrder) {
|
||||
this._propOrder = propOrder || {};
|
||||
this._rules = new Map();
|
||||
this._rules.set('space', SPACE_RULE);
|
||||
constructor(options) {
|
||||
this._propOrder = options.prop_order || {};
|
||||
this._allowFetch = options.allow_fetch || false;
|
||||
this._dotall = options.dotall || false;
|
||||
this._rules = {'space': SPACE_RULE};
|
||||
this._refs = {};
|
||||
this._refsBeingResolved = new Set();
|
||||
}
|
||||
|
||||
_formatLiteral(literal) {
|
||||
@@ -30,83 +55,490 @@ export class SchemaConverter {
|
||||
return `"${escaped}"`;
|
||||
}
|
||||
|
||||
_formatRangeChar(literal) {
|
||||
return JSON.stringify(literal).slice(1, -1).replace(
|
||||
GRAMMAR_RANGE_LITERAL_ESCAPE_RE,
|
||||
m => GRAMMAR_LITERAL_ESCAPES[m]
|
||||
);
|
||||
}
|
||||
|
||||
_addRule(name, rule) {
|
||||
let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
|
||||
let key = escName;
|
||||
|
||||
if (this._rules.has(escName)) {
|
||||
if (this._rules.get(escName) === rule) {
|
||||
if (escName in this._rules) {
|
||||
if (this._rules[escName] === rule) {
|
||||
return key;
|
||||
}
|
||||
|
||||
let i = 0;
|
||||
while (this._rules.has(`${escName}${i}`)) {
|
||||
while ((`${escName}${i}` in this._rules) && (this._rules[`${escName}${i}`] !== rule)) {
|
||||
i += 1;
|
||||
}
|
||||
key = `${escName}${i}`;
|
||||
}
|
||||
|
||||
this._rules.set(key, rule);
|
||||
this._rules[key] = rule;
|
||||
return key;
|
||||
}
|
||||
|
||||
async resolveRefs(schema, url) {
|
||||
const visit = async (n) => {
|
||||
if (Array.isArray(n)) {
|
||||
return Promise.all(n.map(visit));
|
||||
} else if (typeof n === 'object' && n !== null) {
|
||||
let ref = n.$ref;
|
||||
let target;
|
||||
if (ref !== undefined && !this._refs[ref]) {
|
||||
if (ref.startsWith('https://')) {
|
||||
if (!this._allowFetch) {
|
||||
throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
|
||||
}
|
||||
const fetch = (await import('node-fetch')).default;
|
||||
|
||||
const fragSplit = ref.split('#');
|
||||
const baseUrl = fragSplit[0];
|
||||
|
||||
target = this._refs[baseUrl];
|
||||
if (!target) {
|
||||
target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
|
||||
this._refs[baseUrl] = target;
|
||||
}
|
||||
|
||||
if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
|
||||
return target;
|
||||
}
|
||||
} else if (ref.startsWith('#/')) {
|
||||
target = schema;
|
||||
ref = `${url}${ref}`;
|
||||
n.$ref = ref;
|
||||
} else {
|
||||
throw new Error(`Unsupported ref ${ref}`);
|
||||
}
|
||||
|
||||
const selectors = ref.split('#')[1].split('/').slice(1);
|
||||
for (const sel of selectors) {
|
||||
if (!target || !(sel in target)) {
|
||||
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
|
||||
}
|
||||
target = target[sel];
|
||||
}
|
||||
|
||||
this._refs[ref] = target;
|
||||
} else {
|
||||
await Promise.all(Object.values(n).map(visit));
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
};
|
||||
|
||||
return visit(schema);
|
||||
}
|
||||
|
||||
_generateUnionRule(name, altSchemas) {
|
||||
return altSchemas
|
||||
.map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
|
||||
.join(' | ');
|
||||
}
|
||||
|
||||
_visitPattern(pattern, name) {
|
||||
if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
|
||||
throw new Error('Pattern must start with "^" and end with "$"');
|
||||
}
|
||||
pattern = pattern.slice(1, -1);
|
||||
const subRuleIds = {};
|
||||
|
||||
let i = 0;
|
||||
const length = pattern.length;
|
||||
|
||||
const getDot = () => {
|
||||
let rule;
|
||||
if (this._dotall) {
|
||||
rule = '[\\U00000000-\\U0010FFFF]';
|
||||
} else {
|
||||
// Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]';
|
||||
}
|
||||
return this._addRule('dot', rule);
|
||||
};
|
||||
|
||||
|
||||
const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
|
||||
|
||||
const transform = () => {
|
||||
const start = i;
|
||||
// For each component of this sequence, store its string representation and whether it's a literal.
|
||||
// We only need a flat structure here to apply repetition operators to the last item, and
|
||||
// to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
|
||||
// (GBNF's syntax is luckily very close to regular expressions!)
|
||||
const seq = [];
|
||||
|
||||
const joinSeq = () => {
|
||||
const ret = [];
|
||||
for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
|
||||
if (isLiteral) {
|
||||
ret.push([[...g].map(x => x[0]).join(''), true]);
|
||||
} else {
|
||||
ret.push(...g);
|
||||
}
|
||||
}
|
||||
if (ret.length === 1) {
|
||||
return ret[0];
|
||||
}
|
||||
return [ret.map(x => toRule(x)).join(' '), false];
|
||||
};
|
||||
|
||||
while (i < length) {
|
||||
const c = pattern[i];
|
||||
if (c === '.') {
|
||||
seq.push([getDot(), false]);
|
||||
i += 1;
|
||||
} else if (c === '(') {
|
||||
i += 1;
|
||||
if (i < length) {
|
||||
if (pattern[i] === '?') {
|
||||
throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
|
||||
}
|
||||
}
|
||||
seq.push([`(${toRule(transform())})`, false]);
|
||||
} else if (c === ')') {
|
||||
i += 1;
|
||||
if (start <= 0 || pattern[start - 1] !== '(') {
|
||||
throw new Error(`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`);
|
||||
}
|
||||
return joinSeq();
|
||||
} else if (c === '[') {
|
||||
let squareBrackets = c;
|
||||
i += 1;
|
||||
while (i < length && pattern[i] !== ']') {
|
||||
if (pattern[i] === '\\') {
|
||||
squareBrackets += pattern.slice(i, i + 2);
|
||||
i += 2;
|
||||
} else {
|
||||
squareBrackets += pattern[i];
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
if (i >= length) {
|
||||
throw new Error(`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
|
||||
}
|
||||
squareBrackets += ']';
|
||||
i += 1;
|
||||
seq.push([squareBrackets, false]);
|
||||
} else if (c === '|') {
|
||||
seq.push(['|', false]);
|
||||
i += 1;
|
||||
} else if (c === '*' || c === '+' || c === '?') {
|
||||
seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
|
||||
i += 1;
|
||||
} else if (c === '{') {
|
||||
let curlyBrackets = c;
|
||||
i += 1;
|
||||
while (i < length && pattern[i] !== '}') {
|
||||
curlyBrackets += pattern[i];
|
||||
i += 1;
|
||||
}
|
||||
if (i >= length) {
|
||||
throw new Error(`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
|
||||
}
|
||||
curlyBrackets += '}';
|
||||
i += 1;
|
||||
const nums = curlyBrackets.slice(1, -1).split(',').map(s => s.trim());
|
||||
let minTimes, maxTimes;
|
||||
if (nums.length === 1) {
|
||||
minTimes = parseInt(nums[0], 10);
|
||||
maxTimes = minTimes;
|
||||
} else {
|
||||
if (nums.length !== 2) {
|
||||
throw new Error(`Invalid quantifier ${curlyBrackets}`);
|
||||
}
|
||||
minTimes = nums[0] ? parseInt(nums[0], 10) : 0;
|
||||
maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity;
|
||||
}
|
||||
|
||||
let [sub, subIsLiteral] = seq[seq.length - 1];
|
||||
|
||||
if (minTimes === 0 && maxTimes === Infinity) {
|
||||
seq[seq.length - 1] = [`${sub}*`, false];
|
||||
} else if (minTimes === 0 && maxTimes === 1) {
|
||||
seq[seq.length - 1] = [`${sub}?`, false];
|
||||
} else if (minTimes === 1 && maxTimes === Infinity) {
|
||||
seq[seq.length - 1] = [`${sub}+`, false];
|
||||
} else {
|
||||
if (!subIsLiteral) {
|
||||
let id = subRuleIds[sub];
|
||||
if (id === undefined) {
|
||||
id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub);
|
||||
subRuleIds[sub] = id;
|
||||
}
|
||||
sub = id;
|
||||
}
|
||||
|
||||
const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub);
|
||||
const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`];
|
||||
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
|
||||
}
|
||||
} else {
|
||||
let literal = '';
|
||||
while (i < length) {
|
||||
if (pattern[i] === '\\' && i < length - 1) {
|
||||
const next = pattern[i + 1];
|
||||
if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
|
||||
i += 1;
|
||||
literal += pattern[i];
|
||||
i += 1;
|
||||
} else {
|
||||
literal += pattern.slice(i, i + 2);
|
||||
i += 2;
|
||||
}
|
||||
} else if (pattern[i] === '"') {
|
||||
literal += '\\"';
|
||||
i += 1;
|
||||
} else if (!NON_LITERAL_SET.has(pattern[i]) &&
|
||||
(i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
|
||||
literal += pattern[i];
|
||||
i += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (literal !== '') {
|
||||
seq.push([literal, true]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return joinSeq();
|
||||
};
|
||||
|
||||
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||
}
|
||||
|
||||
_resolveRef(ref) {
|
||||
let refName = ref.split('/').pop();
|
||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||
this._refsBeingResolved.add(ref);
|
||||
const resolved = this._refs[ref];
|
||||
refName = this.visit(resolved, refName);
|
||||
this._refsBeingResolved.delete(ref);
|
||||
}
|
||||
return refName;
|
||||
}
|
||||
|
||||
_generateConstantRule(value) {
|
||||
if (typeof value !== 'string') {
|
||||
throw new Error('Only string constants are supported, got ' + JSON.stringify(value));
|
||||
}
|
||||
return this._formatLiteral(value);
|
||||
}
|
||||
|
||||
visit(schema, name) {
|
||||
const schemaType = schema.type;
|
||||
const ruleName = name || 'root';
|
||||
const schemaFormat = schema.format;
|
||||
const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
|
||||
|
||||
if (schema.oneOf || schema.anyOf) {
|
||||
const rule = (schema.oneOf || schema.anyOf).map((altSchema, i) =>
|
||||
this.visit(altSchema, `${name}${name ? "-" : ""}${i}`)
|
||||
).join(' | ');
|
||||
|
||||
return this._addRule(ruleName, rule);
|
||||
const ref = schema.$ref;
|
||||
if (ref !== undefined) {
|
||||
return this._addRule(ruleName, this._resolveRef(ref));
|
||||
} else if (schema.oneOf || schema.anyOf) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
||||
} else if (Array.isArray(schemaType)) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
||||
} else if ('const' in schema) {
|
||||
return this._addRule(ruleName, this._formatLiteral(schema.const));
|
||||
if (typeof schema.const !== 'string') {
|
||||
throw new Error('Only string constants are supported, got ' + JSON.stringify(schema.const));
|
||||
}
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
|
||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if (schemaType === 'object' && 'properties' in schema) {
|
||||
// TODO: `required` keyword (from python implementation)
|
||||
const propOrder = this._propOrder;
|
||||
const propPairs = Object.entries(schema.properties).sort((a, b) => {
|
||||
// sort by position in prop_order (if specified) then by key
|
||||
const orderA = typeof propOrder[a[0]] === 'number' ? propOrder[a[0]] : Infinity;
|
||||
const orderB = typeof propOrder[b[0]] === 'number' ? propOrder[b[0]] : Infinity;
|
||||
return orderA - orderB || a[0].localeCompare(b[0]);
|
||||
});
|
||||
|
||||
let rule = '"{" space';
|
||||
propPairs.forEach(([propName, propSchema], i) => {
|
||||
const propRuleName = this.visit(propSchema, `${name}${name ? "-" : ""}${propName}`);
|
||||
if (i > 0) {
|
||||
rule += ' "," space';
|
||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
||||
('properties' in schema ||
|
||||
('additionalProperties' in schema && schema.additionalProperties !== true))) {
|
||||
const required = new Set(schema.required || []);
|
||||
const properties = Object.entries(schema.properties ?? {});
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
|
||||
} else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
|
||||
const required = new Set();
|
||||
const properties = [];
|
||||
const addComponent = (compSchema, isRequired) => {
|
||||
const ref = compSchema.$ref;
|
||||
if (ref !== undefined) {
|
||||
compSchema = this._refs[ref];
|
||||
}
|
||||
rule += ` ${this._formatLiteral(propName)} space ":" space ${propRuleName}`;
|
||||
});
|
||||
rule += ' "}" space';
|
||||
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if (schemaType === 'array' && 'items' in schema) {
|
||||
// TODO `prefixItems` keyword (from python implementation)
|
||||
const itemRuleName = this.visit(schema.items, `${name}${name ? "-" : ""}item`);
|
||||
const rule = `"[" space (${itemRuleName} ("," space ${itemRuleName})*)? "]" space`;
|
||||
return this._addRule(ruleName, rule);
|
||||
if ('properties' in compSchema) {
|
||||
for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
|
||||
properties.push([propName, propSchema]);
|
||||
if (isRequired) {
|
||||
required.add(propName);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (const t of schema.allOf) {
|
||||
if ('anyOf' in t) {
|
||||
for (const tt of t.anyOf) {
|
||||
addComponent(tt, false);
|
||||
}
|
||||
} else {
|
||||
addComponent(t, true);
|
||||
}
|
||||
}
|
||||
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
|
||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||
const items = schema.items ?? schema.prefixItems;
|
||||
if (Array.isArray(items)) {
|
||||
return this._addRule(
|
||||
ruleName,
|
||||
'"[" space ' +
|
||||
items.map((item, i) => this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`)).join(' "," space ') +
|
||||
' "]" space'
|
||||
);
|
||||
} else {
|
||||
const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
|
||||
const listItemOperator = `( "," space ${itemRuleName} )`;
|
||||
let successiveItems = '';
|
||||
let minItems = schema.minItems || 0;
|
||||
const maxItems = schema.maxItems;
|
||||
if (minItems > 0) {
|
||||
successiveItems = listItemOperator.repeat(minItems - 1);
|
||||
minItems--;
|
||||
}
|
||||
if (maxItems !== undefined && maxItems > minItems) {
|
||||
successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1);
|
||||
} else {
|
||||
successiveItems += `${listItemOperator}*`;
|
||||
}
|
||||
const rule = minItems === 0
|
||||
? `"[" space ( ${itemRuleName} ${successiveItems} )? "]" space`
|
||||
: `"[" space ${itemRuleName} ${successiveItems} "]" space`;
|
||||
return this._addRule(ruleName, rule);
|
||||
}
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
||||
return this._visitPattern(schema.pattern, ruleName);
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
||||
return this._addRule(
|
||||
ruleName === 'root' ? 'root' : schemaFormat,
|
||||
PRIMITIVE_RULES['uuid'])
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
|
||||
for (const [t, r] of Object.entries(DATE_RULES)) {
|
||||
this._addRule(t, r);
|
||||
}
|
||||
return schemaFormat + '-string';
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
for (const n of OBJECT_RULE_NAMES) {
|
||||
this._addRule(n, PRIMITIVE_RULES[n]);
|
||||
}
|
||||
return this._addRule(ruleName, 'object');
|
||||
} else {
|
||||
if (!PRIMITIVE_RULES[schemaType]) {
|
||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||
}
|
||||
return this._addRule(
|
||||
ruleName === 'root' ? 'root' : schemaType,
|
||||
PRIMITIVE_RULES[schemaType]
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
||||
}
|
||||
}
|
||||
|
||||
_buildObjectRule(properties, required, name, additionalProperties) {
|
||||
const propOrder = this._propOrder;
|
||||
// sort by position in prop_order (if specified) then by original order
|
||||
const sortedProps = properties.map(([k]) => k).sort((a, b) => {
|
||||
const orderA = propOrder[a] || Infinity;
|
||||
const orderB = propOrder[b] || Infinity;
|
||||
return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
|
||||
});
|
||||
|
||||
const propKvRuleNames = {};
|
||||
for (const [propName, propSchema] of properties) {
|
||||
const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
|
||||
propKvRuleNames[propName] = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
|
||||
`${this._formatLiteral(propName)} space ":" space ${propRuleName}`
|
||||
);
|
||||
}
|
||||
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||
const optionalProps = sortedProps.filter(k => !required.has(k));
|
||||
|
||||
if (typeof additionalProperties === 'object' || additionalProperties === true) {
|
||||
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||
propKvRuleNames['*'] = this._addRule(
|
||||
`${subName}-kv`,
|
||||
`${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
optionalProps.push('*');
|
||||
}
|
||||
|
||||
let rule = '"{" space ';
|
||||
rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
|
||||
|
||||
if (optionalProps.length > 0) {
|
||||
rule += ' (';
|
||||
if (requiredProps.length > 0) {
|
||||
rule += ' "," space ( ';
|
||||
}
|
||||
|
||||
const getRecursiveRefs = (ks, firstIsOptional) => {
|
||||
const [k, ...rest] = ks;
|
||||
const kvRuleName = propKvRuleNames[k];
|
||||
let res;
|
||||
if (k === '*') {
|
||||
res = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
||||
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
|
||||
)
|
||||
} else if (firstIsOptional) {
|
||||
res = `( "," space ${kvRuleName} )?`;
|
||||
} else {
|
||||
res = kvRuleName;
|
||||
}
|
||||
if (rest.length > 0) {
|
||||
res += ' ' + this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}${k}-rest`,
|
||||
getRecursiveRefs(rest, true)
|
||||
);
|
||||
}
|
||||
return res;
|
||||
};
|
||||
|
||||
rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
|
||||
if (requiredProps.length > 0) {
|
||||
rule += ' )';
|
||||
}
|
||||
rule += ' )?';
|
||||
}
|
||||
|
||||
rule += ' "}" space';
|
||||
|
||||
return rule;
|
||||
}
|
||||
|
||||
formatGrammar() {
|
||||
let grammar = '';
|
||||
this._rules.forEach((rule, name) => {
|
||||
for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) => a.localeCompare(b))) {
|
||||
grammar += `${name} ::= ${rule}\n`;
|
||||
});
|
||||
}
|
||||
return grammar;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to group elements by a key function
|
||||
function* groupBy(iterable, keyFn) {
|
||||
let lastKey = null;
|
||||
let group = [];
|
||||
for (const element of iterable) {
|
||||
const key = keyFn(element);
|
||||
if (lastKey !== null && key !== lastKey) {
|
||||
yield [lastKey, group];
|
||||
group = [];
|
||||
}
|
||||
group.push(element);
|
||||
lastKey = key;
|
||||
}
|
||||
if (group.length > 0) {
|
||||
yield [lastKey, group];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "utils.hpp"
|
||||
|
||||
#include "common.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "llama.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
@@ -178,6 +179,7 @@ struct server_slot {
|
||||
llama_token sampled;
|
||||
struct llama_sampling_params sparams;
|
||||
llama_sampling_context * ctx_sampling = nullptr;
|
||||
json json_schema;
|
||||
|
||||
int32_t ga_i = 0; // group-attention state
|
||||
int32_t ga_n = 1; // group-attention factor
|
||||
@@ -845,7 +847,17 @@ struct server_context {
|
||||
slot.sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||
slot.params.n_keep = json_value(data, "n_keep", slot.params.n_keep);
|
||||
slot.params.seed = json_value(data, "seed", default_params.seed);
|
||||
slot.sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
if (data.contains("json_schema") && !data.contains("grammar")) {
|
||||
try {
|
||||
auto schema = json_value(data, "json_schema", json::object());
|
||||
slot.sparams.grammar = json_schema_to_grammar(schema);
|
||||
} catch (const std::exception & e) {
|
||||
send_error(task, std::string("\"json_schema\": ") + e.what(), ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
slot.sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
}
|
||||
slot.sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
|
||||
@@ -1235,7 +1247,7 @@ struct server_context {
|
||||
{"penalize_nl", slot.sparams.penalize_nl},
|
||||
{"stop", slot.params.antiprompt},
|
||||
{"n_predict", slot.params.n_predict}, // TODO: fix duplicate key n_predict
|
||||
{"n_keep", params.n_keep},
|
||||
{"n_keep", slot.params.n_keep},
|
||||
{"ignore_eos", ignore_eos},
|
||||
{"stream", slot.params.stream},
|
||||
{"logit_bias", slot.sparams.logit_bias},
|
||||
@@ -1746,7 +1758,7 @@ struct server_context {
|
||||
}
|
||||
|
||||
// process in chunks of params.n_batch
|
||||
int32_t n_batch = llama_n_batch(ctx);
|
||||
int32_t n_batch = llama_n_batch(ctx);
|
||||
int32_t n_ubatch = llama_n_ubatch(ctx);
|
||||
|
||||
// next, batch any pending prompts without exceeding n_batch
|
||||
@@ -2213,7 +2225,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
|
||||
printf(" -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
|
||||
printf(" --embeddings enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
|
||||
printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel);
|
||||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
|
||||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: enabled)\n");
|
||||
printf(" -spf FNAME, --system-prompt-file FNAME\n");
|
||||
printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
|
||||
printf(" -ctk TYPE, --cache-type-k TYPE\n");
|
||||
|
||||
@@ -5,15 +5,14 @@ import sys
|
||||
import time
|
||||
import traceback
|
||||
from contextlib import closing
|
||||
|
||||
import psutil
|
||||
from subprocess import TimeoutExpired
|
||||
|
||||
|
||||
def before_scenario(context, scenario):
|
||||
context.debug = 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON'
|
||||
if context.debug:
|
||||
print("DEBUG=ON\n")
|
||||
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m\n")
|
||||
print("DEBUG=ON")
|
||||
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m")
|
||||
port = 8080
|
||||
if 'PORT' in os.environ:
|
||||
port = int(os.environ['PORT'])
|
||||
@@ -27,60 +26,40 @@ def after_scenario(context, scenario):
|
||||
return
|
||||
if scenario.status == "failed":
|
||||
if 'GITHUB_ACTIONS' in os.environ:
|
||||
print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n")
|
||||
print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n")
|
||||
if os.path.isfile('llama.log'):
|
||||
with closing(open('llama.log', 'r')) as f:
|
||||
for line in f:
|
||||
print(line)
|
||||
if not is_server_listening(context.server_fqdn, context.server_port):
|
||||
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
|
||||
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m")
|
||||
|
||||
if not pid_exists(context.server_process.pid):
|
||||
if context.server_process.poll() is not None:
|
||||
assert False, f"Server not running pid={context.server_process.pid} ..."
|
||||
|
||||
server_graceful_shutdown(context)
|
||||
server_graceful_shutdown(context) # SIGINT
|
||||
|
||||
# Wait few for socket to free up
|
||||
time.sleep(0.05)
|
||||
try:
|
||||
context.server_process.wait(0.5)
|
||||
except TimeoutExpired:
|
||||
print(f"server still alive after 500ms, force-killing pid={context.server_process.pid} ...")
|
||||
context.server_process.kill() # SIGKILL
|
||||
context.server_process.wait()
|
||||
|
||||
attempts = 0
|
||||
while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
|
||||
server_kill(context)
|
||||
while is_server_listening(context.server_fqdn, context.server_port):
|
||||
time.sleep(0.1)
|
||||
attempts += 1
|
||||
if attempts > 5:
|
||||
server_kill_hard(context)
|
||||
except:
|
||||
exc = sys.exception()
|
||||
print("error in after scenario: \n")
|
||||
print(exc)
|
||||
print("*** print_tb: \n")
|
||||
traceback.print_tb(exc.__traceback__, file=sys.stdout)
|
||||
except Exception:
|
||||
print("ignoring error in after_scenario:")
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
|
||||
|
||||
def server_graceful_shutdown(context):
|
||||
print(f"shutting down server pid={context.server_process.pid} ...\n")
|
||||
print(f"shutting down server pid={context.server_process.pid} ...")
|
||||
if os.name == 'nt':
|
||||
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
|
||||
interrupt = signal.CTRL_C_EVENT
|
||||
else:
|
||||
os.kill(context.server_process.pid, signal.SIGINT)
|
||||
|
||||
|
||||
def server_kill(context):
|
||||
print(f"killing server pid={context.server_process.pid} ...\n")
|
||||
context.server_process.kill()
|
||||
|
||||
|
||||
def server_kill_hard(context):
|
||||
pid = context.server_process.pid
|
||||
path = context.server_path
|
||||
|
||||
print(f"Server dangling exits, hard killing force {pid}={path}...\n")
|
||||
try:
|
||||
psutil.Process(pid).kill()
|
||||
except psutil.NoSuchProcess:
|
||||
return False
|
||||
return True
|
||||
interrupt = signal.SIGINT
|
||||
context.server_process.send_signal(interrupt)
|
||||
|
||||
|
||||
def is_server_listening(server_fqdn, server_port):
|
||||
@@ -88,14 +67,5 @@ def is_server_listening(server_fqdn, server_port):
|
||||
result = sock.connect_ex((server_fqdn, server_port))
|
||||
_is_server_listening = result == 0
|
||||
if _is_server_listening:
|
||||
print(f"server is listening on {server_fqdn}:{server_port}...\n")
|
||||
print(f"server is listening on {server_fqdn}:{server_port}...")
|
||||
return _is_server_listening
|
||||
|
||||
|
||||
def pid_exists(pid):
|
||||
try:
|
||||
psutil.Process(pid)
|
||||
except psutil.NoSuchProcess:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@@ -37,6 +37,22 @@ Feature: Security
|
||||
| llama.cpp | no |
|
||||
| hackme | raised |
|
||||
|
||||
Scenario Outline: OAI Compatibility (invalid response formats)
|
||||
Given a system prompt test
|
||||
And a user prompt test
|
||||
And a response format <response_format>
|
||||
And a model test
|
||||
And 2 max tokens to predict
|
||||
And streaming is disabled
|
||||
Given an OAI compatible chat completions request with raised api error
|
||||
|
||||
Examples: Prompts
|
||||
| response_format |
|
||||
| {"type": "sound"} |
|
||||
| {"type": "json_object", "schema": 123} |
|
||||
| {"type": "json_object", "schema": {"type": 123}} |
|
||||
| {"type": "json_object", "schema": {"type": "hiccup"}} |
|
||||
|
||||
|
||||
Scenario Outline: CORS Options
|
||||
Given a user api key llama.cpp
|
||||
|
||||
@@ -35,9 +35,9 @@ Feature: llama.cpp server
|
||||
And metric llamacpp:tokens_predicted is <n_predicted>
|
||||
|
||||
Examples: Prompts
|
||||
| prompt | n_predict | re_content | n_prompt | n_predicted | truncated |
|
||||
| I believe the meaning of life is | 8 | (read\|going)+ | 18 | 8 | not |
|
||||
| Write a joke about AI from a very long prompt which will not be truncated | 256 | (princesses\|everyone\|kids)+ | 46 | 64 | not |
|
||||
| prompt | n_predict | re_content | n_prompt | n_predicted | truncated |
|
||||
| I believe the meaning of life is | 8 | (read\|going)+ | 18 | 8 | not |
|
||||
| Write a joke about AI from a very long prompt which will not be truncated | 256 | (princesses\|everyone\|kids\|Anna\|forest)+ | 46 | 64 | not |
|
||||
|
||||
Scenario: Completion prompt truncated
|
||||
Given a prompt:
|
||||
@@ -48,7 +48,7 @@ Feature: llama.cpp server
|
||||
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
"""
|
||||
And a completion request with no api error
|
||||
Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
|
||||
Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry|bowl
|
||||
And the completion is truncated
|
||||
And 109 prompt tokens are processed
|
||||
|
||||
@@ -65,9 +65,25 @@ Feature: llama.cpp server
|
||||
And the completion is <truncated> truncated
|
||||
|
||||
Examples: Prompts
|
||||
| model | system_prompt | user_prompt | max_tokens | re_content | n_prompt | n_predicted | enable_streaming | truncated |
|
||||
| llama-2 | Book | What is the best book | 8 | (Here\|what)+ | 77 | 8 | disabled | not |
|
||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird)+ | -1 | 64 | enabled | |
|
||||
| model | system_prompt | user_prompt | max_tokens | re_content | n_prompt | n_predicted | enable_streaming | truncated |
|
||||
| llama-2 | Book | What is the best book | 8 | (Here\|what)+ | 77 | 8 | disabled | not |
|
||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird\|Annabyear)+ | -1 | 64 | enabled | |
|
||||
|
||||
|
||||
Scenario Outline: OAI Compatibility w/ response format
|
||||
Given a model test
|
||||
And a system prompt test
|
||||
And a user prompt test
|
||||
And a response format <response_format>
|
||||
And 10 max tokens to predict
|
||||
Given an OAI compatible chat completions request with no api error
|
||||
Then <n_predicted> tokens are predicted matching <re_content>
|
||||
|
||||
Examples: Prompts
|
||||
| response_format | n_predicted | re_content |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
||||
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
||||
|
||||
|
||||
Scenario: Tokenize / Detokenize
|
||||
|
||||
@@ -24,12 +24,16 @@ from prometheus_client import parser
|
||||
def step_server_config(context, server_fqdn, server_port):
|
||||
context.server_fqdn = server_fqdn
|
||||
context.server_port = int(server_port)
|
||||
context.n_gpu_layer = None
|
||||
if 'PORT' in os.environ:
|
||||
context.server_port = int(os.environ['PORT'])
|
||||
print(f"$PORT set, overriding server port with to {context.server_port}")
|
||||
if 'FQDN' in os.environ:
|
||||
context.server_fqdn = os.environ['FQDN']
|
||||
print(f"$FQDN set, overriding server fqdn with to {context.server_fqdn}")
|
||||
if 'N_GPU_LAYERS' in os.environ:
|
||||
context.n_gpu_layer = int(os.environ['N_GPU_LAYERS'])
|
||||
print(f"$N_GPU_LAYERS set, overriding n_gpu_layer with to {context.n_gpu_layer}")
|
||||
|
||||
context.base_url = f'http://{context.server_fqdn}:{context.server_port}'
|
||||
|
||||
@@ -41,7 +45,6 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
context.n_ctx = None
|
||||
context.n_ga = None
|
||||
context.n_ga_w = None
|
||||
context.n_gpu_layer = None
|
||||
context.n_predict = None
|
||||
context.n_prompts = 0
|
||||
context.n_server_predict = None
|
||||
@@ -56,6 +59,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
context.seed = None
|
||||
context.server_seed = None
|
||||
context.user_api_key = None
|
||||
context.response_format = None
|
||||
|
||||
context.tasks_result = []
|
||||
context.concurrent_tasks = []
|
||||
@@ -66,7 +70,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
def step_download_hf_model(context, hf_file, hf_repo):
|
||||
context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file)
|
||||
if context.debug:
|
||||
print(f"model file: {context.model_file}\n")
|
||||
print(f"model file: {context.model_file}")
|
||||
|
||||
|
||||
@step('a model file {model_file}')
|
||||
@@ -137,9 +141,12 @@ def step_start_server(context):
|
||||
if 'GITHUB_ACTIONS' in os.environ:
|
||||
max_attempts *= 2
|
||||
|
||||
addrs = socket.getaddrinfo(context.server_fqdn, context.server_port, type=socket.SOCK_STREAM)
|
||||
family, typ, proto, _, sockaddr = addrs[0]
|
||||
|
||||
while True:
|
||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||
result = sock.connect_ex((context.server_fqdn, context.server_port))
|
||||
with closing(socket.socket(family, typ, proto)) as sock:
|
||||
result = sock.connect_ex(sockaddr)
|
||||
if result == 0:
|
||||
print("\x1b[33;46mserver started!\x1b[0m")
|
||||
return
|
||||
@@ -209,7 +216,7 @@ async def step_request_completion(context, api_error):
|
||||
user_api_key=context.user_api_key)
|
||||
context.tasks_result.append(completion)
|
||||
if context.debug:
|
||||
print(f"Completion response: {completion}\n")
|
||||
print(f"Completion response: {completion}")
|
||||
if expect_api_error:
|
||||
assert completion == 401, f"completion must be an 401 status code: {completion}"
|
||||
|
||||
@@ -263,6 +270,11 @@ def step_max_tokens(context, max_tokens):
|
||||
context.n_predict = max_tokens
|
||||
|
||||
|
||||
@step('a response format {response_format}')
|
||||
def step_response_format(context, response_format):
|
||||
context.response_format = json.loads(response_format)
|
||||
|
||||
|
||||
@step('streaming is {enable_streaming}')
|
||||
def step_streaming(context, enable_streaming):
|
||||
context.enable_streaming = enable_streaming == 'enabled'
|
||||
@@ -354,7 +366,7 @@ def step_prompt_passkey(context, passkey, i_pos):
|
||||
prompt += context.prompt_junk_suffix
|
||||
if context.debug:
|
||||
passkey_highlight = "\x1b[33m" + passkey + "\x1b[0m"
|
||||
print(f"Passkey challenge:\n```{prompt.replace(passkey, passkey_highlight)}```\n")
|
||||
print(f"Passkey challenge:\n```{prompt.replace(passkey, passkey_highlight)}```")
|
||||
context.prompts.append(context.prompt_prefix + prompt + context.prompt_suffix)
|
||||
context.n_prompts = len(context.prompts)
|
||||
|
||||
@@ -363,7 +375,7 @@ def step_prompt_passkey(context, passkey, i_pos):
|
||||
@async_run_until_complete
|
||||
async def step_oai_chat_completions(context, api_error):
|
||||
if context.debug:
|
||||
print(f"Submitting OAI compatible completions request...\n")
|
||||
print(f"Submitting OAI compatible completions request...")
|
||||
expect_api_error = api_error == 'raised'
|
||||
completion = await oai_chat_completions(context.prompts.pop(),
|
||||
context.system_prompt,
|
||||
@@ -378,6 +390,9 @@ async def step_oai_chat_completions(context, api_error):
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
|
||||
response_format=context.response_format
|
||||
if hasattr(context, 'response_format') else None,
|
||||
|
||||
seed=await completions_seed(context),
|
||||
|
||||
user_api_key=context.user_api_key
|
||||
@@ -437,6 +452,8 @@ async def step_oai_chat_completions(context):
|
||||
if hasattr(context, 'n_predict') else None,
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
response_format=context.response_format
|
||||
if hasattr(context, 'response_format') else None,
|
||||
seed=await completions_seed(context),
|
||||
user_api_key=context.user_api_key
|
||||
if hasattr(context, 'user_api_key') else None)
|
||||
@@ -457,6 +474,8 @@ async def step_oai_chat_completions(context):
|
||||
if hasattr(context, 'n_predict') else None,
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
response_format=context.response_format
|
||||
if hasattr(context, 'response_format') else None,
|
||||
seed=context.seed
|
||||
if hasattr(context, 'seed') else
|
||||
context.server_seed
|
||||
@@ -508,12 +527,12 @@ async def step_all_embeddings_are_the_same(context):
|
||||
embedding1 = np.array(embeddings[i])
|
||||
embedding2 = np.array(embeddings[j])
|
||||
if context.debug:
|
||||
print(f"embedding1: {embedding1[-8:]}\n")
|
||||
print(f"embedding2: {embedding2[-8:]}\n")
|
||||
print(f"embedding1: {embedding1[-8:]}")
|
||||
print(f"embedding2: {embedding2[-8:]}")
|
||||
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||
msg = f"Similarity between {i} and {j}: {similarity:.10f}"
|
||||
if context.debug:
|
||||
print(f"{msg}\n")
|
||||
print(f"{msg}")
|
||||
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
|
||||
|
||||
|
||||
@@ -630,7 +649,7 @@ async def step_prometheus_metrics_exported(context):
|
||||
metrics_raw = await metrics_response.text()
|
||||
metric_exported = False
|
||||
if context.debug:
|
||||
print(f"/metrics answer:\n{metrics_raw}\n")
|
||||
print(f"/metrics answer:\n{metrics_raw}")
|
||||
context.metrics = {}
|
||||
for metric in parser.text_string_to_metric_families(metrics_raw):
|
||||
match metric.name:
|
||||
@@ -739,6 +758,7 @@ async def oai_chat_completions(user_prompt,
|
||||
model=None,
|
||||
n_predict=None,
|
||||
enable_streaming=None,
|
||||
response_format=None,
|
||||
seed=None,
|
||||
user_api_key=None,
|
||||
expect_api_error=None):
|
||||
@@ -764,6 +784,8 @@ async def oai_chat_completions(user_prompt,
|
||||
"stream": enable_streaming,
|
||||
"seed": seed
|
||||
}
|
||||
if response_format is not None:
|
||||
payload['response_format'] = response_format
|
||||
completion_response = {
|
||||
'content': '',
|
||||
'timings': {
|
||||
@@ -824,6 +846,7 @@ async def oai_chat_completions(user_prompt,
|
||||
model=model,
|
||||
max_tokens=n_predict,
|
||||
stream=enable_streaming,
|
||||
response_format=payload.get('response_format'),
|
||||
seed=seed
|
||||
)
|
||||
except openai.error.AuthenticationError as e:
|
||||
@@ -932,7 +955,7 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
|
||||
last_match = end
|
||||
highlighted += content[last_match:]
|
||||
if 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON':
|
||||
print(f"Checking completion response: {highlighted}\n")
|
||||
print(f"Checking completion response: {highlighted}")
|
||||
assert last_match > 0, f'/{re_content}/ must match ```{highlighted}```'
|
||||
if expected_predicted_n and expected_predicted_n > 0:
|
||||
assert n_predicted == expected_predicted_n, (f'invalid number of tokens predicted:'
|
||||
@@ -942,7 +965,7 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
|
||||
async def gather_tasks_results(context):
|
||||
n_tasks = len(context.concurrent_tasks)
|
||||
if context.debug:
|
||||
print(f"Waiting for all {n_tasks} tasks results...\n")
|
||||
print(f"Waiting for all {n_tasks} tasks results...")
|
||||
for task_no in range(n_tasks):
|
||||
context.tasks_result.append(await context.concurrent_tasks.pop())
|
||||
n_completions = len(context.tasks_result)
|
||||
@@ -959,7 +982,7 @@ async def wait_for_health_status(context,
|
||||
slots_processing=None,
|
||||
expected_slots=None):
|
||||
if context.debug:
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}\n")
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
||||
interval = 0.5
|
||||
counter = 0
|
||||
if 'GITHUB_ACTIONS' in os.environ:
|
||||
@@ -1048,8 +1071,6 @@ def start_server_background(context):
|
||||
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
|
||||
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
||||
server_listen_addr = context.server_fqdn
|
||||
if os.name == 'nt':
|
||||
server_listen_addr = '0.0.0.0'
|
||||
server_args = [
|
||||
'--host', server_listen_addr,
|
||||
'--port', context.server_port,
|
||||
@@ -1088,7 +1109,7 @@ def start_server_background(context):
|
||||
server_args.append('--verbose')
|
||||
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
||||
server_args.extend(['--log-format', "text"])
|
||||
print(f"starting server with: {context.server_path} {server_args}\n")
|
||||
print(f"starting server with: {context.server_path} {server_args}")
|
||||
flags = 0
|
||||
if 'nt' == os.name:
|
||||
flags |= subprocess.DETACHED_PROCESS
|
||||
|
||||
@@ -3,5 +3,4 @@ behave~=1.2.6
|
||||
huggingface_hub~=0.20.3
|
||||
numpy~=1.24.4
|
||||
openai~=0.25.0
|
||||
psutil~=5.9.8
|
||||
prometheus-client~=0.20.0
|
||||
|
||||
@@ -371,11 +371,23 @@ static json oaicompat_completion_params_parse(
|
||||
llama_params["repeat_last_n"] = json_value(body, "repeat_last_n", default_sparams.penalty_last_n);
|
||||
llama_params["ignore_eos"] = json_value(body, "ignore_eos", false);
|
||||
llama_params["tfs_z"] = json_value(body, "tfs_z", default_sparams.tfs_z);
|
||||
llama_params["n_keep"] = json_value(body, "n_keep", 0);
|
||||
|
||||
if (body.count("grammar") != 0) {
|
||||
if (body.contains("grammar")) {
|
||||
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
||||
}
|
||||
|
||||
if (body.contains("response_format")) {
|
||||
auto response_format = json_value(body, "response_format", json::object());
|
||||
if (response_format.contains("type")) {
|
||||
if (response_format["type"] == "json_object") {
|
||||
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
||||
} else {
|
||||
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle 'stop' field
|
||||
if (body.contains("stop") && body["stop"].is_string()) {
|
||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
||||
|
||||
@@ -6,8 +6,6 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
|
||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||
|
||||
|
||||
set GGML_SYCL_DEVICE=0
|
||||
rem set GGML_SYCL_DEBUG=1
|
||||
.\build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 33 -s 0
|
||||
|
||||
|
||||
|
||||
28
examples/ts-type-to-grammar.sh
Executable file
28
examples/ts-type-to-grammar.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}"
|
||||
# python examples/json-schema-to-grammar.py https://json.schemastore.org/tsconfig.json
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
readonly type="$1"
|
||||
|
||||
# Create a temporary directory
|
||||
TMPDIR=""
|
||||
trap 'rm -fR "$TMPDIR"' EXIT
|
||||
TMPDIR=$(mktemp -d)
|
||||
|
||||
DTS_FILE="$TMPDIR/type.d.ts"
|
||||
SCHEMA_FILE="$TMPDIR/schema.json"
|
||||
|
||||
echo "export type MyType = $type" > "$DTS_FILE"
|
||||
|
||||
# This is a fork of typescript-json-schema, actively maintained as of March 2024:
|
||||
# https://github.com/vega/ts-json-schema-generator
|
||||
npx ts-json-schema-generator --unstable --no-top-ref --path "$DTS_FILE" --type MyType -e none > "$SCHEMA_FILE"
|
||||
|
||||
# Alternative, not actively maintained as of March 2024:
|
||||
# https://github.com/YousefED/typescript-json-schema
|
||||
# npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2
|
||||
|
||||
./examples/json-schema-to-grammar.py "$SCHEMA_FILE"
|
||||
6
flake.lock
generated
6
flake.lock
generated
@@ -20,11 +20,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1709703039,
|
||||
"narHash": "sha256-6hqgQ8OK6gsMu1VtcGKBxKQInRLHtzulDo9Z5jxHEFY=",
|
||||
"lastModified": 1710451336,
|
||||
"narHash": "sha256-pP86Pcfu3BrAvRO7R64x7hs+GaQrjFes+mEPowCfkxY=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "9df3e30ce24fd28c7b3e2de0d986769db5d6225d",
|
||||
"rev": "d691274a972b3165335d261cc4671335f5c67de9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
@@ -1015,7 +1015,7 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
||||
#endif
|
||||
|
||||
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
||||
#define GGML_SCHED_MAX_SPLIT_INPUTS 4
|
||||
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
|
||||
#endif
|
||||
|
||||
#ifndef GGML_SCHED_MAX_COPIES
|
||||
|
||||
3710
ggml-cuda.cu
3710
ggml-cuda.cu
File diff suppressed because it is too large
Load Diff
44
ggml-metal.m
44
ggml-metal.m
@@ -173,8 +173,9 @@ enum ggml_metal_kernel_type {
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1,
|
||||
//GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0,
|
||||
//GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F16_F16,
|
||||
GGML_METAL_KERNEL_TYPE_CPY_F16_F32,
|
||||
GGML_METAL_KERNEL_TYPE_CONCAT,
|
||||
@@ -598,8 +599,9 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0, cpy_f32_q8_0, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0, cpy_f32_q4_0, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1, cpy_f32_q4_1, true);
|
||||
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0, cpy_f32_q5_0, true);
|
||||
//GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1, cpy_f32_q5_1, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0, cpy_f32_q5_0, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1, cpy_f32_q5_1, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL, cpy_f32_iq4_nl, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F16, cpy_f16_f16, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CPY_F16_F32, cpy_f16_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CONCAT, concat, true);
|
||||
@@ -739,6 +741,9 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
||||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q4_0:
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@@ -1387,6 +1392,14 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
(ne11 > ne11_mm_min || (ggml_is_quantized(src0t) && ne12 > 1))) {
|
||||
//printf("matrix: ne00 = %6d, ne01 = %6d, ne02 = %6d, ne11 = %6d, ne12 = %6d\n", ne00, ne01, ne02, ne11, ne12);
|
||||
|
||||
// some Metal matrix data types require aligned pointers
|
||||
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5)
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32: GGML_ASSERT(nb01 % 16 == 0); break;
|
||||
case GGML_TYPE_F16: GGML_ASSERT(nb01 % 8 == 0); break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
|
||||
switch (src0->type) {
|
||||
@@ -1701,6 +1714,14 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
ne20 % 32 == 0 && ne20 >= 64 &&
|
||||
ne11 > ne11_mm_min) {
|
||||
|
||||
// some Metal matrix data types require aligned pointers
|
||||
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5)
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32: GGML_ASSERT(nb01 % 16 == 0); break;
|
||||
case GGML_TYPE_F16: GGML_ASSERT(nb01 % 8 == 0); break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
|
||||
switch (src2->type) {
|
||||
@@ -2431,13 +2452,14 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||
GGML_ASSERT(ne0 % ggml_blck_size(dst->type) == 0);
|
||||
|
||||
switch (dstt) {
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F16].pipeline; break;
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F32].pipeline; break;
|
||||
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0].pipeline; break;
|
||||
case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0].pipeline; break;
|
||||
case GGML_TYPE_Q4_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1].pipeline; break;
|
||||
//case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
|
||||
//case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
|
||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F16].pipeline; break;
|
||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_F32].pipeline; break;
|
||||
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q8_0].pipeline; break;
|
||||
case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_0].pipeline; break;
|
||||
case GGML_TYPE_Q4_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q4_1].pipeline; break;
|
||||
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
|
||||
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
|
||||
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break;
|
||||
default: GGML_ASSERT(false && "not implemented");
|
||||
};
|
||||
} break;
|
||||
|
||||
288
ggml-metal.metal
288
ggml-metal.metal
@@ -2388,6 +2388,242 @@ kernel void kernel_cpy_f32_q4_1(
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_cpy_f32_q5_0(
|
||||
device const float * src0,
|
||||
device void * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant int64_t & ne0,
|
||||
constant int64_t & ne1,
|
||||
constant int64_t & ne2,
|
||||
constant int64_t & ne3,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
const int64_t i03 = tgpig[2];
|
||||
const int64_t i02 = tgpig[1];
|
||||
const int64_t i01 = tgpig[0];
|
||||
|
||||
const int64_t n = i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00;
|
||||
|
||||
const int64_t i3 = n / (ne2*ne1*ne0);
|
||||
const int64_t i2 = (n - i3*ne2*ne1*ne0) / (ne1*ne0);
|
||||
const int64_t i1 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0) / ne0;
|
||||
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0)/QK5_0;
|
||||
|
||||
device block_q5_0 * dst_data = (device block_q5_0 *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
||||
|
||||
for (int64_t i00 = tpitg.x*QK5_0; i00 < ne00; i00 += ntg.x*QK5_0) {
|
||||
device const float * src = (device float *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
||||
|
||||
float amax = 0.0f; // absolute max
|
||||
float max = 0.0f;
|
||||
|
||||
for (int j = 0; j < QK5_0; j++) {
|
||||
const float v = src[j];
|
||||
if (amax < fabs(v)) {
|
||||
amax = fabs(v);
|
||||
max = v;
|
||||
}
|
||||
}
|
||||
|
||||
const float d = max / -16;
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
dst_data[i00/QK5_0].d = d;
|
||||
|
||||
uint32_t qh = 0;
|
||||
for (int j = 0; j < QK5_0/2; ++j) {
|
||||
const float x0 = src[0 + j]*id;
|
||||
const float x1 = src[QK5_0/2 + j]*id;
|
||||
|
||||
const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f));
|
||||
const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f));
|
||||
|
||||
dst_data[i00/QK5_0].qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4);
|
||||
qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
|
||||
qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2);
|
||||
}
|
||||
thread const uint8_t * qh8 = (thread const uint8_t *)&qh;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
dst_data[i00/QK5_0].qh[j] = qh8[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_cpy_f32_q5_1(
|
||||
device const float * src0,
|
||||
device void * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant int64_t & ne0,
|
||||
constant int64_t & ne1,
|
||||
constant int64_t & ne2,
|
||||
constant int64_t & ne3,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
const int64_t i03 = tgpig[2];
|
||||
const int64_t i02 = tgpig[1];
|
||||
const int64_t i01 = tgpig[0];
|
||||
|
||||
const int64_t n = i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00;
|
||||
|
||||
const int64_t i3 = n / (ne2*ne1*ne0);
|
||||
const int64_t i2 = (n - i3*ne2*ne1*ne0) / (ne1*ne0);
|
||||
const int64_t i1 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0) / ne0;
|
||||
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0)/QK5_1;
|
||||
|
||||
device block_q5_1 * dst_data = (device block_q5_1 *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
||||
|
||||
for (int64_t i00 = tpitg.x*QK5_1; i00 < ne00; i00 += ntg.x*QK5_1) {
|
||||
device const float * src = (device float *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
||||
|
||||
float max = src[0];
|
||||
float min = src[0];
|
||||
|
||||
for (int j = 1; j < QK5_1; j++) {
|
||||
const float v = src[j];
|
||||
min = v < min ? v : min;
|
||||
max = v > max ? v : max;
|
||||
}
|
||||
|
||||
const float d = (max - min) / 31;
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
dst_data[i00/QK5_1].d = d;
|
||||
dst_data[i00/QK5_1].m = min;
|
||||
|
||||
uint32_t qh = 0;
|
||||
for (int j = 0; j < QK5_1/2; ++j) {
|
||||
const float x0 = (src[0 + j] - min)*id;
|
||||
const float x1 = (src[QK5_1/2 + j] - min)*id;
|
||||
|
||||
const uint8_t xi0 = (uint8_t)(x0 + 0.5f);
|
||||
const uint8_t xi1 = (uint8_t)(x1 + 0.5f);
|
||||
|
||||
dst_data[i00/QK5_1].qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4);
|
||||
qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
|
||||
qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_1/2);
|
||||
}
|
||||
thread const uint8_t * qh8 = (thread const uint8_t *)&qh;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
dst_data[i00/QK5_1].qh[j] = qh8[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int best_index_int8(int n, constant float * val, float x) {
|
||||
if (x <= val[0]) return 0;
|
||||
if (x >= val[n-1]) return n-1;
|
||||
int ml = 0, mu = n-1;
|
||||
while (mu-ml > 1) {
|
||||
int mav = (ml+mu)/2;
|
||||
if (x < val[mav]) mu = mav; else ml = mav;
|
||||
}
|
||||
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
||||
}
|
||||
|
||||
constexpr constant static float kvalues_iq4nl_f[16] = {
|
||||
-127.f, -104.f, -83.f, -65.f, -49.f, -35.f, -22.f, -10.f, 1.f, 13.f, 25.f, 38.f, 53.f, 69.f, 89.f, 113.f
|
||||
};
|
||||
|
||||
kernel void kernel_cpy_f32_iq4_nl(
|
||||
device const float * src0,
|
||||
device void * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant int64_t & ne0,
|
||||
constant int64_t & ne1,
|
||||
constant int64_t & ne2,
|
||||
constant int64_t & ne3,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
const int64_t i03 = tgpig[2];
|
||||
const int64_t i02 = tgpig[1];
|
||||
const int64_t i01 = tgpig[0];
|
||||
|
||||
const int64_t n = i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00;
|
||||
|
||||
const int64_t i3 = n / (ne2*ne1*ne0);
|
||||
const int64_t i2 = (n - i3*ne2*ne1*ne0) / (ne1*ne0);
|
||||
const int64_t i1 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0) / ne0;
|
||||
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0)/QK4_NL;
|
||||
|
||||
device block_iq4_nl * dst_data = (device block_iq4_nl *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
||||
|
||||
for (int64_t i00 = tpitg.x*QK4_NL; i00 < ne00; i00 += ntg.x*QK4_NL) {
|
||||
device const float * src = (device float *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
||||
|
||||
float amax = 0.0f; // absolute max
|
||||
float max = 0.0f;
|
||||
|
||||
for (int j = 0; j < QK4_0; j++) {
|
||||
const float v = src[j];
|
||||
if (amax < fabs(v)) {
|
||||
amax = fabs(v);
|
||||
max = v;
|
||||
}
|
||||
}
|
||||
|
||||
const float d = max / kvalues_iq4nl_f[0];
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
float sumqx = 0, sumq2 = 0;
|
||||
for (int j = 0; j < QK4_NL/2; ++j) {
|
||||
const float x0 = src[0 + j]*id;
|
||||
const float x1 = src[QK4_NL/2 + j]*id;
|
||||
|
||||
const uint8_t xi0 = best_index_int8(16, kvalues_iq4nl_f, x0);
|
||||
const uint8_t xi1 = best_index_int8(16, kvalues_iq4nl_f, x1);
|
||||
|
||||
dst_data[i00/QK4_NL].qs[j] = xi0 | (xi1 << 4);
|
||||
|
||||
const float v0 = kvalues_iq4nl_f[xi0];
|
||||
const float v1 = kvalues_iq4nl_f[xi1];
|
||||
const float w0 = src[0 + j]*src[0 + j];
|
||||
const float w1 = src[QK4_NL/2 + j]*src[QK4_NL/2 + j];
|
||||
sumqx += w0*v0*src[j] + w1*v1*src[QK4_NL/2 + j];
|
||||
sumq2 += w0*v0*v0 + w1*v1*v1;
|
||||
|
||||
}
|
||||
|
||||
dst_data[i00/QK4_NL].d = sumq2 > 0 ? sumqx/sumq2 : d;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_concat(
|
||||
device const char * src0,
|
||||
device const char * src1,
|
||||
@@ -4220,10 +4456,6 @@ void kernel_mul_mv_iq1_s_f32_impl(
|
||||
}
|
||||
}
|
||||
|
||||
constexpr constant static float kvalues_iq4nl_f[16] = {
|
||||
-127.f, -104.f, -83.f, -65.f, -49.f, -35.f, -22.f, -10.f, 1.f, 13.f, 25.f, 38.f, 53.f, 69.f, 89.f, 113.f
|
||||
};
|
||||
|
||||
void kernel_mul_mv_iq4_nl_f32_impl(
|
||||
device const void * src0,
|
||||
device const float * src1,
|
||||
@@ -5528,18 +5760,18 @@ typedef void (mat_mm_t)(
|
||||
threadgroup uchar *,
|
||||
uint3, uint, uint);
|
||||
|
||||
template [[host_name("kernel_mul_mm_f32_f32")]] kernel mat_mm_t kernel_mul_mm<float4x4, 1, dequantize_f32>;
|
||||
template [[host_name("kernel_mul_mm_f16_f32")]] kernel mat_mm_t kernel_mul_mm<half4x4, 1, dequantize_f16>;
|
||||
template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2, dequantize_q4_0>;
|
||||
template [[host_name("kernel_mul_mm_q4_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2, dequantize_q4_1>;
|
||||
template [[host_name("kernel_mul_mm_q5_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_0, 2, dequantize_q5_0>;
|
||||
template [[host_name("kernel_mul_mm_q5_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_1, 2, dequantize_q5_1>;
|
||||
template [[host_name("kernel_mul_mm_q8_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q8_0, 2, dequantize_q8_0>;
|
||||
template [[host_name("kernel_mul_mm_q2_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q2_K, QK_NL, dequantize_q2_K>;
|
||||
template [[host_name("kernel_mul_mm_q3_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q3_K, QK_NL, dequantize_q3_K>;
|
||||
template [[host_name("kernel_mul_mm_q4_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_K, QK_NL, dequantize_q4_K>;
|
||||
template [[host_name("kernel_mul_mm_q5_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_K, QK_NL, dequantize_q5_K>;
|
||||
template [[host_name("kernel_mul_mm_q6_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
|
||||
template [[host_name("kernel_mul_mm_f32_f32")]] kernel mat_mm_t kernel_mul_mm<float4x4, 1, dequantize_f32>;
|
||||
template [[host_name("kernel_mul_mm_f16_f32")]] kernel mat_mm_t kernel_mul_mm<half4x4, 1, dequantize_f16>;
|
||||
template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2, dequantize_q4_0>;
|
||||
template [[host_name("kernel_mul_mm_q4_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2, dequantize_q4_1>;
|
||||
template [[host_name("kernel_mul_mm_q5_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_0, 2, dequantize_q5_0>;
|
||||
template [[host_name("kernel_mul_mm_q5_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_1, 2, dequantize_q5_1>;
|
||||
template [[host_name("kernel_mul_mm_q8_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q8_0, 2, dequantize_q8_0>;
|
||||
template [[host_name("kernel_mul_mm_q2_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q2_K, QK_NL, dequantize_q2_K>;
|
||||
template [[host_name("kernel_mul_mm_q3_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q3_K, QK_NL, dequantize_q3_K>;
|
||||
template [[host_name("kernel_mul_mm_q4_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_K, QK_NL, dequantize_q4_K>;
|
||||
template [[host_name("kernel_mul_mm_q5_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q5_K, QK_NL, dequantize_q5_K>;
|
||||
template [[host_name("kernel_mul_mm_q6_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
|
||||
template [[host_name("kernel_mul_mm_iq2_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
||||
template [[host_name("kernel_mul_mm_iq2_xs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
||||
template [[host_name("kernel_mul_mm_iq3_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
||||
@@ -5588,18 +5820,18 @@ typedef void (mat_mm_id_t)(
|
||||
threadgroup uchar *,
|
||||
uint3, uint, uint);
|
||||
|
||||
template [[host_name("kernel_mul_mm_id_f32_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<float4x4, 1, dequantize_f32>;
|
||||
template [[host_name("kernel_mul_mm_id_f16_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<half4x4, 1, dequantize_f16>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_0, 2, dequantize_q4_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_1_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_1, 2, dequantize_q4_1>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_0, 2, dequantize_q5_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_1_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_1, 2, dequantize_q5_1>;
|
||||
template [[host_name("kernel_mul_mm_id_q8_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q8_0, 2, dequantize_q8_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q2_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q2_K, QK_NL, dequantize_q2_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q3_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q3_K, QK_NL, dequantize_q3_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_K, QK_NL, dequantize_q4_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_K, QK_NL, dequantize_q5_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q6_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
|
||||
template [[host_name("kernel_mul_mm_id_f32_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<float4x4, 1, dequantize_f32>;
|
||||
template [[host_name("kernel_mul_mm_id_f16_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<half4x4, 1, dequantize_f16>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_0, 2, dequantize_q4_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_1_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_1, 2, dequantize_q4_1>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_0, 2, dequantize_q5_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_1_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_1, 2, dequantize_q5_1>;
|
||||
template [[host_name("kernel_mul_mm_id_q8_0_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q8_0, 2, dequantize_q8_0>;
|
||||
template [[host_name("kernel_mul_mm_id_q2_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q2_K, QK_NL, dequantize_q2_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q3_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q3_K, QK_NL, dequantize_q3_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q4_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q4_K, QK_NL, dequantize_q4_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q5_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q5_K, QK_NL, dequantize_q5_K>;
|
||||
template [[host_name("kernel_mul_mm_id_q6_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
|
||||
template [[host_name("kernel_mul_mm_id_iq2_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
||||
template [[host_name("kernel_mul_mm_id_iq2_xs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
||||
template [[host_name("kernel_mul_mm_id_iq3_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
||||
|
||||
@@ -11705,9 +11705,8 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
||||
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
||||
float * scales, float * weight, uint8_t * L,
|
||||
const int8_t * values,
|
||||
const float * quant_weights) {
|
||||
|
||||
const int ntry = 7;
|
||||
const float * quant_weights,
|
||||
const int ntry) {
|
||||
|
||||
float sigma2 = 0;
|
||||
for (int j = 0; j < super_block_size; ++j) sigma2 += x[j]*x[j];
|
||||
@@ -11719,6 +11718,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
||||
float max_scale = 0, amax_scale = 0;
|
||||
for (int ib = 0; ib < super_block_size/block_size; ++ib) {
|
||||
const float * xb = x + ib*block_size;
|
||||
uint8_t * Lb = L + ib*block_size;
|
||||
if (quant_weights) {
|
||||
const float * qw = quant_weights + ib*block_size;
|
||||
for (int j = 0; j < block_size; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
|
||||
@@ -11736,12 +11736,13 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
||||
scales[ib] = 0;
|
||||
continue;
|
||||
}
|
||||
float d = -max/values[0];
|
||||
float d = ntry > 0 ? -max/values[0] : max/values[0];
|
||||
float id = 1/d;
|
||||
float sumqx = 0, sumq2 = 0;
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
float al = id*xb[j];
|
||||
int l = best_index_int8(16, values, al);
|
||||
Lb[j] = l;
|
||||
float q = values[l];
|
||||
float w = weight[j];
|
||||
sumqx += w*q*xb[j];
|
||||
@@ -11796,9 +11797,11 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
||||
}
|
||||
} else {
|
||||
dh[0] = GGML_FP32_TO_FP16(scales[0]);
|
||||
float id = scales[0] ? 1/scales[0] : 0;
|
||||
for (int j = 0; j < super_block_size; ++j) {
|
||||
L[j] = best_index_int8(16, values, id*x[j]);
|
||||
if (ntry > 0) {
|
||||
float id = scales[0] ? 1/scales[0] : 0;
|
||||
for (int j = 0; j < super_block_size; ++j) {
|
||||
L[j] = best_index_int8(16, values, id*x[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11823,7 +11826,7 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
|
||||
for (int ibl = 0; ibl < nblock; ++ibl) {
|
||||
const float * qw = quant_weights ? quant_weights + QK4_NL*ibl : NULL;
|
||||
quantize_row_iq4_nl_impl(QK4_NL, 32, src + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
|
||||
&scale, weight, L, kvalues_iq4nl, qw);
|
||||
&scale, weight, L, kvalues_iq4nl, qw, 7);
|
||||
}
|
||||
src += n_per_row;
|
||||
qrow += nblock*sizeof(block_iq4_nl);
|
||||
@@ -11832,14 +11835,23 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
|
||||
}
|
||||
|
||||
void quantize_row_iq4_nl(const float * restrict x, void * restrict vy, int k) {
|
||||
assert(k % QK4_NL == 0);
|
||||
block_iq4_nl * restrict y = vy;
|
||||
quantize_row_iq4_nl_reference(x, y, k);
|
||||
GGML_ASSERT(k%QK4_NL == 0);
|
||||
int nblock = k/QK4_NL;
|
||||
uint8_t L[QK4_NL];
|
||||
float weight[QK4_NL];
|
||||
uint16_t unused_h;
|
||||
uint8_t * unused_l = NULL;
|
||||
float scale;
|
||||
block_iq4_nl * iq4 = (block_iq4_nl *)vy;
|
||||
for (int ibl = 0; ibl < nblock; ++ibl) {
|
||||
quantize_row_iq4_nl_impl(QK4_NL, 32, x + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
|
||||
&scale, weight, L, kvalues_iq4nl, NULL, -1);
|
||||
}
|
||||
}
|
||||
|
||||
void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * restrict y, int k) {
|
||||
assert(k % QK4_NL == 0);
|
||||
quantize_iq4_nl(x, y, 1, k, NULL);
|
||||
quantize_row_iq4_nl(x, y, k);
|
||||
}
|
||||
|
||||
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
||||
@@ -11857,7 +11869,7 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow
|
||||
for (int ibl = 0; ibl < nblock; ++ibl) {
|
||||
const float * qw = quant_weights ? quant_weights + QK_K*ibl : NULL;
|
||||
quantize_row_iq4_nl_impl(QK_K, 32, src + QK_K*ibl, &iq4[ibl].d, iq4[ibl].qs, &iq4[ibl].scales_h, iq4[ibl].scales_l,
|
||||
scales, weight, L, kvalues_iq4nl, qw);
|
||||
scales, weight, L, kvalues_iq4nl, qw, 7);
|
||||
}
|
||||
src += n_per_row;
|
||||
qrow += nblock*sizeof(block_iq4_xs);
|
||||
|
||||
@@ -977,8 +977,10 @@ namespace dpct
|
||||
static int convert_backend_index(std::string & backend) {
|
||||
if (backend == "ext_oneapi_level_zero:gpu") return 0;
|
||||
if (backend == "opencl:gpu") return 1;
|
||||
if (backend == "opencl:cpu") return 2;
|
||||
if (backend == "opencl:acc") return 3;
|
||||
if (backend == "ext_oneapi_cuda:gpu") return 2;
|
||||
if (backend == "ext_oneapi_hip:gpu") return 3;
|
||||
if (backend == "opencl:cpu") return 4;
|
||||
if (backend == "opencl:acc") return 5;
|
||||
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_SYCL_MAX_DEVICES 16
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
|
||||
GGML_API void ggml_init_sycl(void);
|
||||
|
||||
20
llama.cpp
20
llama.cpp
@@ -540,6 +540,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
||||
{
|
||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||
{ LLM_TENSOR_OUTPUT, "output"},
|
||||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
||||
@@ -4300,9 +4301,9 @@ static bool llm_load_tensors(
|
||||
{
|
||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd});
|
||||
if (gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_OUTPUT, "weight").c_str()) >= 0) {
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
|
||||
} else {
|
||||
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
|
||||
if (!model.output) {
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // needs to be on GPU
|
||||
ml.n_created--; // artificial tensor
|
||||
ml.size_data += ggml_nbytes(model.output);
|
||||
@@ -4507,10 +4508,12 @@ static bool llm_load_tensors(
|
||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||
model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, false);
|
||||
|
||||
// same as tok_embd, duplicated to allow offloading
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
ml.n_created--; // artificial tensor
|
||||
ml.size_data += ggml_nbytes(model.output);
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
|
||||
if (!model.output) {
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); // needs to be on GPU
|
||||
ml.n_created--; // artificial tensor
|
||||
ml.size_data += ggml_nbytes(model.output);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
@@ -13041,6 +13044,9 @@ struct llama_context * llama_new_context_with_model(
|
||||
cparams.rope_freq_base = params.rope_freq_base == 0.0f ? hparams.rope_freq_base_train : params.rope_freq_base;
|
||||
cparams.rope_freq_scale = params.rope_freq_scale == 0.0f ? hparams.rope_freq_scale_train : params.rope_freq_scale;
|
||||
|
||||
// this is necessary due to kv_self.n being padded later during inference
|
||||
cparams.n_ctx = GGML_PAD(cparams.n_ctx, 32);
|
||||
|
||||
// with causal attention, the batch size is limited by the context size
|
||||
cparams.n_batch = hparams.causal_attn ? std::min(cparams.n_ctx, params.n_batch) : params.n_batch;
|
||||
cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch);
|
||||
|
||||
@@ -1,66 +1,75 @@
|
||||
function(llama_build_executable source)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
# Builds and runs a test source file.
|
||||
# Optional args:
|
||||
# - NAME: name of the executable & test target (defaults to the source file name without extension)
|
||||
# - LABEL: label for the test (defaults to main)
|
||||
# - ARGS: arguments to pass to the test executable
|
||||
# - WORKING_DIRECTORY
|
||||
function(llama_test source)
|
||||
include(CMakeParseArguments)
|
||||
set(options)
|
||||
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
|
||||
set(multiValueArgs ARGS)
|
||||
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||
|
||||
if (NOT DEFINED LLAMA_TEST_LABEL)
|
||||
set(LLAMA_TEST_LABEL "main")
|
||||
endif()
|
||||
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
|
||||
set(LLAMA_TEST_WORKING_DIRECTORY .)
|
||||
endif()
|
||||
if (DEFINED LLAMA_TEST_NAME)
|
||||
set(TEST_TARGET ${LLAMA_TEST_NAME})
|
||||
else()
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
endif()
|
||||
|
||||
add_executable(${TEST_TARGET} ${source} get-model.cpp)
|
||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE common)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar)
|
||||
add_test(
|
||||
NAME ${TEST_TARGET}
|
||||
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
|
||||
COMMAND $<TARGET_FILE:${TEST_TARGET}>
|
||||
${LLAMA_TEST_ARGS})
|
||||
|
||||
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
||||
endfunction()
|
||||
|
||||
function(llama_test_executable name source)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||
set_property(TEST ${name} PROPERTY LABELS "main")
|
||||
endfunction()
|
||||
# llama_test(test-double-float.cpp) # SLOW
|
||||
llama_test(test-quantize-fns.cpp)
|
||||
llama_test(test-quantize-perf.cpp)
|
||||
llama_test(test-sampling.cpp)
|
||||
llama_test(test-chat-template.cpp)
|
||||
|
||||
function(llama_build_and_test_executable source)
|
||||
llama_build_and_test_executable_with_label(${source} "main")
|
||||
endfunction()
|
||||
llama_test(test-tokenizer-0-llama.cpp NAME test-tokenizer-0-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
|
||||
function(llama_build_and_test_executable_with_label source label)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
add_executable(${TEST_TARGET} ${source} get-model.cpp)
|
||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE common)
|
||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
|
||||
endfunction()
|
||||
llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
|
||||
|
||||
# llama_build_and_test_executable(test-double-float.cpp) # SLOW
|
||||
llama_build_and_test_executable(test-quantize-fns.cpp)
|
||||
llama_build_and_test_executable(test-quantize-perf.cpp)
|
||||
llama_build_and_test_executable(test-sampling.cpp)
|
||||
llama_build_and_test_executable(test-chat-template.cpp)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-stablelm-3b-4e1t ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
||||
llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
|
||||
#llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
|
||||
|
||||
llama_build_executable(test-tokenizer-0-llama.cpp)
|
||||
llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_test(test-grammar-parser.cpp)
|
||||
llama_test(test-llama-grammar.cpp)
|
||||
llama_test(test-grad0.cpp)
|
||||
# llama_test(test-opt.cpp) # SLOW
|
||||
llama_test(test-backend-ops.cpp)
|
||||
|
||||
llama_build_executable(test-tokenizer-0-falcon.cpp)
|
||||
llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
llama_test(test-rope.cpp)
|
||||
|
||||
llama_build_executable(test-tokenizer-1-llama.cpp)
|
||||
llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_test_executable (test-tokenizer-1-baichuan test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
|
||||
llama_test(test-model-load-cancel.cpp LABEL "model")
|
||||
llama_test(test-autorelease.cpp LABEL "model")
|
||||
|
||||
llama_build_executable(test-tokenizer-1-bpe.cpp)
|
||||
llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
llama_test_executable (test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||
llama_test_executable (test-tokenizer-1-mpt test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
||||
llama_test_executable (test-tokenizer-1-stablelm-3b-4e1t test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf)
|
||||
llama_test_executable (test-tokenizer-1-gpt-neox test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
|
||||
llama_test_executable (test-tokenizer-1-refact test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
||||
llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
|
||||
llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
|
||||
# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
|
||||
|
||||
llama_build_and_test_executable(test-grammar-parser.cpp)
|
||||
llama_build_and_test_executable(test-llama-grammar.cpp)
|
||||
llama_build_and_test_executable(test-grad0.cpp)
|
||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
||||
llama_build_and_test_executable(test-backend-ops.cpp)
|
||||
|
||||
llama_build_and_test_executable(test-rope.cpp)
|
||||
|
||||
llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
|
||||
llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
|
||||
llama_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
|
||||
|
||||
# dummy executable - not installed
|
||||
get_filename_component(TEST_TARGET test-c.c NAME_WE)
|
||||
|
||||
10
tests/run-json-schema-to-grammar.mjs
Normal file
10
tests/run-json-schema-to-grammar.mjs
Normal file
@@ -0,0 +1,10 @@
|
||||
import { readFileSync } from "fs"
|
||||
import { SchemaConverter } from "../examples/server/public/json-schema-to-grammar.mjs"
|
||||
|
||||
const [, , file] = process.argv
|
||||
const url = `file://${file}`
|
||||
let schema = JSON.parse(readFileSync(file, "utf8"));
|
||||
const converter = new SchemaConverter({})
|
||||
schema = await converter.resolveRefs(schema, url)
|
||||
converter.visit(schema, '')
|
||||
console.log(converter.formatGrammar())
|
||||
@@ -2091,6 +2091,13 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1}, {4, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1}, {4, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1}, {4, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
||||
|
||||
for (ggml_type type_a : all_types) {
|
||||
for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
|
||||
for (int n_mats : {2, 4, 8}) {
|
||||
|
||||
824
tests/test-json-schema-to-grammar.cpp
Executable file
824
tests/test-json-schema-to-grammar.cpp
Executable file
@@ -0,0 +1,824 @@
|
||||
#ifdef NDEBUG
|
||||
#undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
static std::string trim(const std::string & source) {
|
||||
std::string s(source);
|
||||
s.erase(0,s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t")+1);
|
||||
return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1");
|
||||
}
|
||||
|
||||
enum TestCaseStatus {
|
||||
SUCCESS,
|
||||
FAILURE
|
||||
};
|
||||
|
||||
struct TestCase {
|
||||
TestCaseStatus expected_status;
|
||||
std::string name;
|
||||
std::string schema;
|
||||
std::string expected_grammar;
|
||||
|
||||
void _print_failure_header() const {
|
||||
fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str());
|
||||
}
|
||||
void verify(const std::string & actual_grammar) const {
|
||||
if (trim(actual_grammar) != trim(expected_grammar)) {
|
||||
_print_failure_header();
|
||||
fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
void verify_expectation_parseable() const {
|
||||
try {
|
||||
auto state = grammar_parser::parse(expected_grammar.c_str());
|
||||
if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
|
||||
throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
|
||||
}
|
||||
} catch (const std::runtime_error & ex) {
|
||||
_print_failure_header();
|
||||
fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what());
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
void verify_status(TestCaseStatus status) const {
|
||||
if (status != expected_status) {
|
||||
_print_failure_header();
|
||||
fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE");
|
||||
fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE");
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void write(const std::string & file, const std::string & content) {
|
||||
std::ofstream f;
|
||||
f.open(file.c_str());
|
||||
f << content.c_str();
|
||||
f.close();
|
||||
}
|
||||
|
||||
static std::string read(const std::string & file) {
|
||||
std::ostringstream actuals;
|
||||
actuals << std::ifstream(file.c_str()).rdbuf();
|
||||
return actuals.str();
|
||||
}
|
||||
|
||||
static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) {
|
||||
fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str());
|
||||
auto test = [&](const TestCase & tc) {
|
||||
fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : "");
|
||||
runner(tc);
|
||||
};
|
||||
|
||||
test({
|
||||
FAILURE,
|
||||
"unknown type",
|
||||
R"""({
|
||||
"type": "kaboom"
|
||||
})""",
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
FAILURE,
|
||||
"invalid type type",
|
||||
R"""({
|
||||
"type": 123
|
||||
})""",
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"empty schema (object)",
|
||||
"{}",
|
||||
R"""(
|
||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||
boolean ::= ("true" | "false") space
|
||||
null ::= "null" space
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"exotic formats",
|
||||
R"""({
|
||||
"items": [
|
||||
{ "format": "date" },
|
||||
{ "format": "uuid" },
|
||||
{ "format": "time" },
|
||||
{ "format": "date-time" }
|
||||
]
|
||||
})""",
|
||||
R"""(
|
||||
date ::= [0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] )
|
||||
date-string ::= "\"" date "\"" space
|
||||
date-time ::= date "T" time
|
||||
date-time-string ::= "\"" date-time "\"" space
|
||||
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
|
||||
space ::= " "?
|
||||
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
||||
time-string ::= "\"" time "\"" space
|
||||
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"string",
|
||||
R"""({
|
||||
"type": "string"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"boolean",
|
||||
R"""({
|
||||
"type": "boolean"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= ("true" | "false") space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"integer",
|
||||
R"""({
|
||||
"type": "integer"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"string const",
|
||||
R"""({
|
||||
"const": "foo"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"foo\""
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
FAILURE,
|
||||
"non-string const",
|
||||
R"""({
|
||||
"const": 123
|
||||
})""",
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
FAILURE,
|
||||
"non-string enum",
|
||||
R"""({
|
||||
"enum": [123]
|
||||
})""",
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"tuple1",
|
||||
R"""({
|
||||
"prefixItems": [{ "type": "string" }]
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "[" space string "]" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"tuple2",
|
||||
R"""({
|
||||
"prefixItems": [{ "type": "string" }, { "type": "number" }]
|
||||
})""",
|
||||
R"""(
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "[" space string "," space number "]" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"number",
|
||||
R"""({
|
||||
"type": "number"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"minItems",
|
||||
R"""({
|
||||
"items": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"minItems": 2
|
||||
})""",
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"maxItems 1",
|
||||
R"""({
|
||||
"items": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"maxItems": 1
|
||||
})""",
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space ( boolean )? "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"maxItems 2",
|
||||
R"""({
|
||||
"items": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"maxItems": 2
|
||||
})""",
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space ( boolean ( "," space boolean )? )? "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"min + maxItems",
|
||||
R"""({
|
||||
"items": {
|
||||
"type": ["number", "integer"]
|
||||
},
|
||||
"minItems": 3,
|
||||
"maxItems": 5
|
||||
})""",
|
||||
R"""(
|
||||
integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
||||
item ::= number | integer
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"simple regexp",
|
||||
R"""({
|
||||
"type": "string",
|
||||
"pattern": "^abc?d*efg+(hij)?kl$"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"regexp escapes",
|
||||
R"""({
|
||||
"type": "string",
|
||||
"pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "[]{}()|+*?" "\"" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"regexp quote",
|
||||
R"""({
|
||||
"type": "string",
|
||||
"pattern": "^\"$"
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"" "\"" "\"" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"regexp",
|
||||
R"""({
|
||||
"type": "string",
|
||||
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$"
|
||||
})""",
|
||||
R"""(
|
||||
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
||||
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
||||
root-1 ::= [0-9]
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {
|
||||
"type": "string"
|
||||
},
|
||||
"b": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"a",
|
||||
"b"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"definitions": {}
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
b-kv ::= "\"b\"" space ":" space string
|
||||
root ::= "{" space a-kv "," space b-kv "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"1 optional prop",
|
||||
R"""({
|
||||
"properties": {
|
||||
"a": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
root ::= "{" space (a-kv )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"N optional props",
|
||||
R"""({
|
||||
"properties": {
|
||||
"a": {"type": "string"},
|
||||
"b": {"type": "string"},
|
||||
"c": {"type": "string"}
|
||||
},
|
||||
"additionalProperties": false
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
a-rest ::= ( "," space b-kv )? b-rest
|
||||
b-kv ::= "\"b\"" space ":" space string
|
||||
b-rest ::= ( "," space c-kv )?
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required + optional props",
|
||||
R"""({
|
||||
"properties": {
|
||||
"a": {"type": "string"},
|
||||
"b": {"type": "string"},
|
||||
"c": {"type": "string"},
|
||||
"d": {"type": "string"}
|
||||
},
|
||||
"required": ["a", "b"],
|
||||
"additionalProperties": false
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
b-kv ::= "\"b\"" space ":" space string
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
c-rest ::= ( "," space d-kv )?
|
||||
d-kv ::= "\"d\"" space ":" space string
|
||||
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"additional props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"additionalProperties": {"type": "array", "items": {"type": "number"}}
|
||||
})""",
|
||||
R"""(
|
||||
additional-kv ::= string ":" space additional-value
|
||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||
additional-value ::= "[" space ( number ( "," space number )* )? "]" space
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "{" space (additional-kvs )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"additional props (true)",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
})""",
|
||||
R"""(
|
||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||
boolean ::= ("true" | "false") space
|
||||
null ::= "null" space
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"additional props (implicit)",
|
||||
R"""({
|
||||
"type": "object"
|
||||
})""",
|
||||
R"""(
|
||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
||||
boolean ::= ("true" | "false") space
|
||||
null ::= "null" space
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
||||
root ::= object
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"empty w/o additional props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"additionalProperties": false
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "{" space "}" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required + additional props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {"type": "number"}
|
||||
},
|
||||
"required": ["a"],
|
||||
"additionalProperties": {"type": "string"}
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space number
|
||||
additional-kv ::= string ":" space string
|
||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"optional + additional props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {"type": "number"}
|
||||
},
|
||||
"additionalProperties": {"type": "number"}
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space number
|
||||
a-rest ::= additional-kvs
|
||||
additional-kv ::= string ":" space number
|
||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required + optional + additional props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {"type": "number"},
|
||||
"b": {"type": "number"}
|
||||
},
|
||||
"required": ["a"],
|
||||
"additionalProperties": {"type": "number"}
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space number
|
||||
additional-kv ::= string ":" space number
|
||||
additional-kvs ::= additional-kv ( "," space additional-kv )*
|
||||
b-kv ::= "\"b\"" space ":" space number
|
||||
b-rest ::= additional-kvs
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"top-level $ref",
|
||||
R"""({
|
||||
"$ref": "#/definitions/MyType",
|
||||
"definitions": {
|
||||
"MyType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"a"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
})""",
|
||||
R"""(
|
||||
MyType ::= "{" space MyType-a-kv "}" space
|
||||
MyType-a-kv ::= "\"a\"" space ":" space string
|
||||
root ::= MyType
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"anyOf",
|
||||
R"""({
|
||||
"anyOf": [
|
||||
{"$ref": "#/definitions/foo"},
|
||||
{"$ref": "#/definitions/bar"}
|
||||
],
|
||||
"definitions": {
|
||||
"foo": {
|
||||
"properties": {"a": {"type": "number"}}
|
||||
},
|
||||
"bar": {
|
||||
"properties": {"b": {"type": "number"}}
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
})""",
|
||||
R"""(
|
||||
alternative-0 ::= foo
|
||||
alternative-1 ::= bar
|
||||
bar ::= "{" space (bar-b-kv )? "}" space
|
||||
bar-b-kv ::= "\"b\"" space ":" space number
|
||||
foo ::= "{" space (foo-a-kv )? "}" space
|
||||
foo-a-kv ::= "\"a\"" space ":" space number
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= alternative-0 | alternative-1
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
|
||||
R"""({
|
||||
"allOf": [
|
||||
{"$ref": "#/definitions/foo"},
|
||||
{"$ref": "#/definitions/bar"},
|
||||
{
|
||||
"anyOf": [
|
||||
{"$ref": "#/definitions/baz"},
|
||||
{"$ref": "#/definitions/bam"}
|
||||
]
|
||||
}
|
||||
],
|
||||
"definitions": {
|
||||
"foo": {
|
||||
"properties": {"a": {"type": "number"}}
|
||||
},
|
||||
"bar": {
|
||||
"properties": {"b": {"type": "number"}}
|
||||
},
|
||||
"bam": {
|
||||
"properties": {"c": {"type": "number"}}
|
||||
},
|
||||
"baz": {
|
||||
"properties": {"d": {"type": "number"}}
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
})""",
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space number
|
||||
b-kv ::= "\"b\"" space ":" space number
|
||||
c-kv ::= "\"c\"" space ":" space number
|
||||
d-kv ::= "\"d\"" space ":" space number
|
||||
d-rest ::= ( "," space c-kv )?
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"conflicting names",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"number": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"number": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"root": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"root"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"number"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"number"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"definitions": {}
|
||||
})""",
|
||||
R"""(
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
number- ::= "{" space number-number-kv "}" space
|
||||
number-kv ::= "\"number\"" space ":" space number-
|
||||
number-number ::= "{" space number-number-root-kv "}" space
|
||||
number-number-kv ::= "\"number\"" space ":" space number-number
|
||||
number-number-root-kv ::= "\"root\"" space ":" space number
|
||||
root ::= "{" space number-kv "}" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_all("C++", [](const TestCase & tc) {
|
||||
try {
|
||||
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
|
||||
tc.verify_status(SUCCESS);
|
||||
} catch (const std::runtime_error & ex) {
|
||||
fprintf(stderr, "Error: %s\n", ex.what());
|
||||
tc.verify_status(FAILURE);
|
||||
}
|
||||
});
|
||||
//test_all("Python", [](const TestCase & tc) {
|
||||
// write("test-json-schema-input.tmp", tc.schema);
|
||||
// tc.verify_status(std::system(
|
||||
// "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
// tc.verify(read("test-grammar-output.tmp"));
|
||||
//});
|
||||
//test_all("JavaScript", [](const TestCase & tc) {
|
||||
// write("test-json-schema-input.tmp", tc.schema);
|
||||
// tc.verify_status(std::system(
|
||||
// "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
// tc.verify(read("test-grammar-output.tmp"));
|
||||
//});
|
||||
|
||||
test_all("Check Expectations Validity", [](const TestCase & tc) {
|
||||
if (tc.expected_status == SUCCESS) {
|
||||
tc.verify_expectation_parseable();
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user