mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-26 14:23:22 +02:00
Compare commits
84 Commits
master-468
...
master-46e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
46ef5b5fcf | ||
|
|
c63bb1d16a | ||
|
|
3b6cfe7c92 | ||
|
|
800c9635b4 | ||
|
|
deb7dfca4b | ||
|
|
bac66994cf | ||
|
|
519c981f8b | ||
|
|
1123f7fbdf | ||
|
|
ef3f333d37 | ||
|
|
8e4364f2af | ||
|
|
1e3bc523d8 | ||
|
|
14b1d7e6f7 | ||
|
|
226255b44e | ||
|
|
930523c8e1 | ||
|
|
c8dba409e6 | ||
|
|
6381d4e110 | ||
|
|
dadbed99e6 | ||
|
|
cb1c0727bd | ||
|
|
9e232f0234 | ||
|
|
5e9ff54a67 | ||
|
|
1f0bccb279 | ||
|
|
f63564adfa | ||
|
|
2d8b76a110 | ||
|
|
7af633aec3 | ||
|
|
097e121e2f | ||
|
|
eaf98c2649 | ||
|
|
e9b12c332e | ||
|
|
604b8bdfa6 | ||
|
|
10151bee2e | ||
|
|
0992a7b8b1 | ||
|
|
6ddeefad9b | ||
|
|
8dae7ce684 | ||
|
|
a73ccf1aa3 | ||
|
|
7cf54e1f74 | ||
|
|
a872a2b28e | ||
|
|
0919a0f73d | ||
|
|
ed53db86c3 | ||
|
|
fc8ef549e5 | ||
|
|
bf83bff674 | ||
|
|
b5ffb2849d | ||
|
|
3ebb00935f | ||
|
|
d783f7982e | ||
|
|
d75561df20 | ||
|
|
348acf188c | ||
|
|
1cd06fa25e | ||
|
|
2feb8934eb | ||
|
|
5517d6e692 | ||
|
|
f31b539714 | ||
|
|
ee77efea2a | ||
|
|
f64d44a9b9 | ||
|
|
b19edd54d5 | ||
|
|
53dc399472 | ||
|
|
9ca4abed89 | ||
|
|
e59fcb2bc1 | ||
|
|
1638757767 | ||
|
|
916a9acdd0 | ||
|
|
ea04a4ca19 | ||
|
|
25d43e0eb5 | ||
|
|
f5bfea0580 | ||
|
|
acfc5478ff | ||
|
|
7ed8d1fe7f | ||
|
|
e7f94d6fdc | ||
|
|
2d7baaf50f | ||
|
|
f3c3b4b167 | ||
|
|
93356bdb7a | ||
|
|
60baff7c85 | ||
|
|
9082b5dfbf | ||
|
|
99d29c0094 | ||
|
|
3d9a551816 | ||
|
|
f6f9896ac3 | ||
|
|
34a14b28ff | ||
|
|
7297128db8 | ||
|
|
86c3219895 | ||
|
|
2e8265ae17 | ||
|
|
f514d1b306 | ||
|
|
332311234a | ||
|
|
182af739c4 | ||
|
|
4329d1acb0 | ||
|
|
02f9d96a86 | ||
|
|
3498588e0f | ||
|
|
5f631c2679 | ||
|
|
415e99fec2 | ||
|
|
ff966e7ca6 | ||
|
|
8183159cf3 |
8
.gitignore
vendored
8
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.gguf
|
||||
*.bin
|
||||
.DS_Store
|
||||
.build/
|
||||
.cache/
|
||||
@@ -39,13 +41,17 @@ models-mnt
|
||||
/perplexity
|
||||
/embedding
|
||||
/train-text-from-scratch
|
||||
/convert-llama2c-to-ggml
|
||||
/simple
|
||||
/benchmark-matmult
|
||||
/vdot
|
||||
/server
|
||||
/Pipfile
|
||||
/embd-input-test
|
||||
/gguf
|
||||
/gguf-llama-simple
|
||||
/libllama.so
|
||||
/llama-bench
|
||||
build-info.h
|
||||
arm_neon.h
|
||||
compile_commands.json
|
||||
@@ -62,12 +68,12 @@ perf-*.txt
|
||||
|
||||
examples/jeopardy/results.txt
|
||||
|
||||
|
||||
pyproject.toml
|
||||
poetry.lock
|
||||
poetry.toml
|
||||
|
||||
# Test binaries
|
||||
tests/test-grammar-parser
|
||||
tests/test-double-float
|
||||
tests/test-grad0
|
||||
tests/test-opt
|
||||
|
||||
@@ -69,7 +69,6 @@ option(LLAMA_BLAS "llama: use BLAS"
|
||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
|
||||
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
|
||||
set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels")
|
||||
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
||||
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
|
||||
@@ -256,7 +255,6 @@ if (LLAMA_CUBLAS)
|
||||
# if (LLAMA_CUDA_CUBLAS)
|
||||
# add_compile_definitions(GGML_CUDA_CUBLAS)
|
||||
# endif()
|
||||
add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
|
||||
if (LLAMA_CUDA_FORCE_DMMV)
|
||||
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
||||
endif()
|
||||
@@ -298,7 +296,6 @@ if (LLAMA_METAL)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
|
||||
|
||||
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
|
||||
|
||||
@@ -315,7 +312,6 @@ if (LLAMA_METAL)
|
||||
${FOUNDATION_LIBRARY}
|
||||
${METAL_FRAMEWORK}
|
||||
${METALKIT_FRAMEWORK}
|
||||
${METALPERFORMANCE_FRAMEWORK}
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -501,9 +497,11 @@ else()
|
||||
endif()
|
||||
|
||||
#
|
||||
# Build libraries
|
||||
# libraries
|
||||
#
|
||||
|
||||
# ggml
|
||||
|
||||
add_library(ggml OBJECT
|
||||
ggml.c
|
||||
ggml.h
|
||||
@@ -528,10 +526,11 @@ if (BUILD_SHARED_LIBS)
|
||||
install(TARGETS ggml_shared LIBRARY)
|
||||
endif()
|
||||
|
||||
# llama
|
||||
|
||||
add_library(llama
|
||||
llama.cpp
|
||||
llama.h
|
||||
llama-util.h
|
||||
)
|
||||
|
||||
target_include_directories(llama PUBLIC .)
|
||||
@@ -550,6 +549,10 @@ if (BUILD_SHARED_LIBS)
|
||||
install(TARGETS llama LIBRARY)
|
||||
endif()
|
||||
|
||||
#
|
||||
# install
|
||||
#
|
||||
|
||||
include(GNUInstallDirs)
|
||||
install(
|
||||
FILES convert.py
|
||||
@@ -573,11 +576,23 @@ install(
|
||||
WORLD_READ
|
||||
WORLD_EXECUTE
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
if (LLAMA_METAL)
|
||||
install(
|
||||
FILES ggml-metal.metal
|
||||
PERMISSIONS
|
||||
OWNER_READ
|
||||
OWNER_WRITE
|
||||
GROUP_READ
|
||||
WORLD_READ
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
add_subdirectory(common)
|
||||
|
||||
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
|
||||
91
Makefile
91
Makefile
@@ -1,8 +1,8 @@
|
||||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple server embd-input-test gguf llama-bench
|
||||
|
||||
# Binaries only useful for tests
|
||||
TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
|
||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
|
||||
|
||||
default: $(BUILD_TARGETS)
|
||||
|
||||
@@ -45,8 +45,8 @@ OPT = -Ofast
|
||||
else
|
||||
OPT = -O3
|
||||
endif
|
||||
CFLAGS = -I. $(OPT) -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
|
||||
CFLAGS = -I. $(OPT) -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
|
||||
LDFLAGS =
|
||||
|
||||
ifdef LLAMA_DEBUG
|
||||
@@ -142,6 +142,28 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||
#CXXFLAGS += -mssse3
|
||||
endif
|
||||
|
||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||
# Apple M1, M2, etc.
|
||||
# Raspberry Pi 3, 4, Zero 2 (64-bit)
|
||||
CFLAGS += -mcpu=native
|
||||
CXXFLAGS += -mcpu=native
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||
# Raspberry Pi 1, Zero
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||
# Raspberry Pi 2
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||
@@ -231,11 +253,6 @@ ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||
else
|
||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||
endif
|
||||
ifdef LLAMA_CUDA_MMQ_Y
|
||||
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
|
||||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
|
||||
endif # LLAMA_CUDA_MMQ_Y
|
||||
#ifdef LLAMA_CUDA_CUBLAS
|
||||
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||
#endif # LLAMA_CUDA_CUBLAS
|
||||
@@ -266,32 +283,10 @@ endif # LLAMA_CLBLAST
|
||||
ifdef LLAMA_METAL
|
||||
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
|
||||
CXXFLAGS += -DGGML_USE_METAL
|
||||
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
||||
OBJS += ggml-metal.o
|
||||
endif # LLAMA_METAL
|
||||
|
||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||
# Apple M1, M2, etc.
|
||||
# Raspberry Pi 3, 4, Zero 2 (64-bit)
|
||||
CFLAGS += -mcpu=native
|
||||
CXXFLAGS += -mcpu=native
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||
# Raspberry Pi 1, Zero
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||
# Raspberry Pi 2
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifdef LLAMA_METAL
|
||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
@@ -334,26 +329,29 @@ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||
|
||||
OBJS += ggml-alloc.o
|
||||
|
||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
|
||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
common.o: examples/common.cpp examples/common.h
|
||||
common.o: common/common.cpp common/common.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
|
||||
console.o: common/console.cpp common/console.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
libllama.so: llama.o ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $(TEST_TARGETS)
|
||||
rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch convert-llama2c-to-ggml embd-input-test gguf llama-bench build-info.h $(TEST_TARGETS)
|
||||
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
|
||||
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
|
||||
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
@echo
|
||||
@echo '==== Run ./main -h for help. ===='
|
||||
@@ -377,7 +375,7 @@ embedding: examples/embedding/embedding.cpp build-info.h ggml.
|
||||
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
||||
|
||||
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
@@ -387,7 +385,16 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in
|
||||
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
|
||||
|
||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
build-info.h: $(wildcard .git/index) scripts/build-info.sh
|
||||
@@ -411,6 +418,12 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
|
||||
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
||||
54
README.md
54
README.md
@@ -9,13 +9,19 @@
|
||||
|
||||
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
||||
|
||||
**Hot topics:**
|
||||
### Hot topics
|
||||
|
||||
- Simple web chat example: https://github.com/ggerganov/llama.cpp/pull/1998
|
||||
- k-quants now support super-block size of 64: https://github.com/ggerganov/llama.cpp/pull/2001
|
||||
- New roadmap: https://github.com/users/ggerganov/projects/7
|
||||
- Azure CI brainstorming: https://github.com/ggerganov/llama.cpp/discussions/1985
|
||||
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
|
||||
A new file format has been introduced: [GGUF](https://github.com/ggerganov/llama.cpp/pull/2398)
|
||||
|
||||
Last revision compatible with the old format: [dadbed9](https://github.com/ggerganov/llama.cpp/commit/dadbed99e65252d79f81101a392d0d6497b86caa)
|
||||
|
||||
### Current `master` should be considered in Beta - expect some issues for a few days!
|
||||
|
||||
### Be prepared to re-convert and / or re-quantize your GGUF models while this notice is up!
|
||||
|
||||
### Issues with non-GGUF models will be considered with low priority!
|
||||
|
||||
----
|
||||
|
||||
<details>
|
||||
<summary>Table of Contents</summary>
|
||||
@@ -96,8 +102,10 @@ as the main playground for developing new features for the [ggml](https://github
|
||||
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||
- Node.js: [hlhr202/llama-node](https://github.com/hlhr202/llama-node)
|
||||
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
|
||||
- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
|
||||
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
|
||||
- Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s)
|
||||
- Clojure: [phronmophobic/llama.clj](https://github.com/phronmophobic/llama.clj)
|
||||
|
||||
**UI:**
|
||||
|
||||
@@ -238,12 +246,17 @@ In order to build llama.cpp you have three different options.
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
- Using `Zig`:
|
||||
- Using `Zig` (version 0.11 or later):
|
||||
|
||||
Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
|
||||
it's also possible to cross compile for other operating systems and architectures:
|
||||
|
||||
```bash
|
||||
zig build -Doptimize=ReleaseFast
|
||||
zig build -Doptimize=ReleaseFast -Dtarget=x86_64-windows-gnu -Dcpu=x86_64+avx2+fma+f16c
|
||||
```
|
||||
|
||||
The `zig targets` command will give you valid options to use.
|
||||
|
||||
- Using `gmake` (FreeBSD):
|
||||
|
||||
1. Install and activate [DRM in FreeBSD](https://wiki.freebsd.org/Graphics)
|
||||
@@ -284,7 +297,7 @@ When built with Metal support, you can enable GPU inference with the `--gpu-laye
|
||||
Any value larger than 0 will offload the computation to the GPU. For example:
|
||||
|
||||
```bash
|
||||
./main -m ./models/7B/ggml-model-q4_0.bin -n 128 -ngl 1
|
||||
./main -m ./models/7B/ggml-model-q4_0.gguf -n 128 -ngl 1
|
||||
```
|
||||
|
||||
### MPI Build
|
||||
@@ -323,7 +336,7 @@ The above will distribute the computation across 2 processes on the first host a
|
||||
Finally, you're ready to run a computation using `mpirun`:
|
||||
|
||||
```bash
|
||||
mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.bin -n 128
|
||||
mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
|
||||
```
|
||||
|
||||
### BLAS Build
|
||||
@@ -406,10 +419,9 @@ Building the program with BLAS support may lead to some performance improvements
|
||||
--->
|
||||
| Option | Legal values | Default | Description |
|
||||
|-------------------------|------------------------|---------|-------------|
|
||||
| LLAMA_CUDA_MMQ_Y | Positive integer >= 32 | 64 | Tile size in y direction when using the custom CUDA kernels for prompt processing. Higher values can be faster depending on the amount of shared memory available. Power of 2 heavily recommended. |
|
||||
| LLAMA_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
|
||||
| LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. |
|
||||
| LLAMA_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. |
|
||||
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
||||
|
||||
@@ -507,10 +519,10 @@ python3 convert.py models/7B/
|
||||
python convert.py models/7B/ --vocabtype bpe
|
||||
|
||||
# quantize the model to 4-bits (using q4_0 method)
|
||||
./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
|
||||
./quantize ./models/7B/ggml-model-f16.gguf ./models/7B/ggml-model-q4_0.gguf q4_0
|
||||
|
||||
# run the inference
|
||||
./main -m ./models/7B/ggml-model-q4_0.bin -n 128
|
||||
./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
|
||||
```
|
||||
|
||||
When running the larger models, make sure you have enough disk space to store all the intermediate files.
|
||||
@@ -566,7 +578,7 @@ Here is an example of a few-shot interaction, invoked with the command
|
||||
./examples/chat-13B.sh
|
||||
|
||||
# custom arguments using a 13B model
|
||||
./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
|
||||
./main -m ./models/13B/ggml-model-q4_0.gguf -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
|
||||
```
|
||||
|
||||
Note the use of `--color` to distinguish between user input and generated text. Other parameters are explained in more detail in the [README](examples/main/README.md) for the `main` example program.
|
||||
@@ -629,6 +641,8 @@ OpenLLaMA is an openly licensed reproduction of Meta's original LLaMA model. It
|
||||
|
||||
### Using [GPT4All](https://github.com/nomic-ai/gpt4all)
|
||||
|
||||
*Note: these instructions are likely obsoleted by the GGUF update*
|
||||
|
||||
- Obtain the `tokenizer.model` file from LLaMA model and put it to `models`
|
||||
- Obtain the `added_tokens.json` file from Alpaca model and put it to `models`
|
||||
- Obtain the `gpt4all-lora-quantized.bin` file from GPT4All model and put it to `models/gpt4all-7B`
|
||||
@@ -704,7 +718,7 @@ If your issue is with model generation quality, then please at least scan the fo
|
||||
#### How to run
|
||||
|
||||
1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
||||
2. Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||
2. Run `./perplexity -m models/7B/ggml-model-q4_0.gguf -f wiki.test.raw`
|
||||
3. Output:
|
||||
```
|
||||
perplexity : calculating perplexity over 655 chunks
|
||||
@@ -803,13 +817,13 @@ docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-
|
||||
On completion, you are ready to play!
|
||||
|
||||
```bash
|
||||
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
|
||||
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512
|
||||
```
|
||||
|
||||
or with a light image:
|
||||
|
||||
```bash
|
||||
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512
|
||||
docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512
|
||||
```
|
||||
|
||||
### Docker With CUDA
|
||||
@@ -840,8 +854,8 @@ The resulting images, are essentially the same as the non-CUDA images:
|
||||
After building locally, Usage is similar to the non-CUDA examples, but you'll need to add the `--gpus` flag. You will also want to use the `--n-gpu-layers` flag.
|
||||
|
||||
```bash
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1
|
||||
```
|
||||
|
||||
### Contributing
|
||||
|
||||
169
build.zig
169
build.zig
@@ -1,68 +1,121 @@
|
||||
// Compatible with Zig Version 0.11.0
|
||||
const std = @import("std");
|
||||
const commit_hash = @embedFile(".git/refs/heads/master");
|
||||
const ArrayList = std.ArrayList;
|
||||
const Compile = std.Build.Step.Compile;
|
||||
const ConfigHeader = std.Build.Step.ConfigHeader;
|
||||
const Mode = std.builtin.Mode;
|
||||
const CrossTarget = std.zig.CrossTarget;
|
||||
|
||||
// Zig Version: 0.11.0-dev.3986+e05c242cd
|
||||
pub fn build(b: *std.build.Builder) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
const Maker = struct {
|
||||
builder: *std.build.Builder,
|
||||
target: CrossTarget,
|
||||
optimize: Mode,
|
||||
config_header: *ConfigHeader,
|
||||
enable_lto: bool,
|
||||
|
||||
const config_header = b.addConfigHeader(
|
||||
.{ .style = .blank, .include_path = "build-info.h" },
|
||||
.{
|
||||
.BUILD_NUMBER = 0,
|
||||
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
|
||||
},
|
||||
);
|
||||
include_dirs: ArrayList([]const u8),
|
||||
cflags: ArrayList([]const u8),
|
||||
cxxflags: ArrayList([]const u8),
|
||||
objs: ArrayList(*Compile),
|
||||
|
||||
const lib = b.addStaticLibrary(.{
|
||||
.name = "llama",
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
lib.linkLibC();
|
||||
lib.linkLibCpp();
|
||||
lib.addIncludePath(".");
|
||||
lib.addIncludePath("./examples");
|
||||
lib.addConfigHeader(config_header);
|
||||
lib.addCSourceFiles(&.{"ggml.c"}, &.{"-std=c11"});
|
||||
lib.addCSourceFiles(&.{"llama.cpp"}, &.{"-std=c++11"});
|
||||
b.installArtifact(lib);
|
||||
fn addInclude(m: *Maker, dir: []const u8) !void {
|
||||
try m.include_dirs.append(dir);
|
||||
}
|
||||
fn addProjectInclude(m: *Maker, path: []const []const u8) !void {
|
||||
try m.addInclude(try m.builder.build_root.join(m.builder.allocator, path));
|
||||
}
|
||||
fn addCFlag(m: *Maker, flag: []const u8) !void {
|
||||
try m.cflags.append(flag);
|
||||
}
|
||||
fn addCxxFlag(m: *Maker, flag: []const u8) !void {
|
||||
try m.cxxflags.append(flag);
|
||||
}
|
||||
fn addFlag(m: *Maker, flag: []const u8) !void {
|
||||
try m.addCFlag(flag);
|
||||
try m.addCxxFlag(flag);
|
||||
}
|
||||
|
||||
const examples = .{
|
||||
"main",
|
||||
"baby-llama",
|
||||
"embedding",
|
||||
"metal",
|
||||
"perplexity",
|
||||
"quantize",
|
||||
"quantize-stats",
|
||||
"save-load-state",
|
||||
"server",
|
||||
"simple",
|
||||
"train-text-from-scratch",
|
||||
};
|
||||
fn init(builder: *std.build.Builder) !Maker {
|
||||
const commit_hash = @embedFile(".git/refs/heads/master");
|
||||
const config_header = builder.addConfigHeader(
|
||||
.{ .style = .blank, .include_path = "build-info.h" },
|
||||
.{
|
||||
.BUILD_NUMBER = 0,
|
||||
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
|
||||
},
|
||||
);
|
||||
var m = Maker{
|
||||
.builder = builder,
|
||||
.target = builder.standardTargetOptions(.{}),
|
||||
.optimize = builder.standardOptimizeOption(.{}),
|
||||
.config_header = config_header,
|
||||
.enable_lto = false,
|
||||
.include_dirs = ArrayList([]const u8).init(builder.allocator),
|
||||
.cflags = ArrayList([]const u8).init(builder.allocator),
|
||||
.cxxflags = ArrayList([]const u8).init(builder.allocator),
|
||||
.objs = ArrayList(*Compile).init(builder.allocator),
|
||||
};
|
||||
try m.addCFlag("-std=c11");
|
||||
try m.addCxxFlag("-std=c++11");
|
||||
try m.addProjectInclude(&.{});
|
||||
try m.addProjectInclude(&.{"examples"});
|
||||
return m;
|
||||
}
|
||||
|
||||
inline for (examples) |example_name| {
|
||||
const exe = b.addExecutable(.{
|
||||
.name = example_name,
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
exe.addIncludePath(".");
|
||||
exe.addIncludePath("./examples");
|
||||
exe.addConfigHeader(config_header);
|
||||
exe.addCSourceFiles(&.{
|
||||
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{ example_name, example_name }),
|
||||
"examples/common.cpp",
|
||||
}, &.{"-std=c++11"});
|
||||
exe.linkLibrary(lib);
|
||||
b.installArtifact(exe);
|
||||
fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
|
||||
const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
||||
if (std.mem.endsWith(u8, src, ".c")) {
|
||||
o.addCSourceFiles(&.{src}, m.cflags.items);
|
||||
o.linkLibC();
|
||||
} else {
|
||||
o.addCSourceFiles(&.{src}, m.cxxflags.items);
|
||||
o.linkLibCpp();
|
||||
}
|
||||
for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i });
|
||||
o.want_lto = m.enable_lto;
|
||||
return o;
|
||||
}
|
||||
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
if (b.args) |args| run_cmd.addArgs(args);
|
||||
fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
|
||||
const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
||||
e.addCSourceFiles(&.{src}, m.cxxflags.items);
|
||||
for (deps) |d| e.addObject(d);
|
||||
for (m.objs.items) |o| e.addObject(o);
|
||||
for (m.include_dirs.items) |i| e.addIncludePath(.{ .path = i });
|
||||
e.linkLibC();
|
||||
e.linkLibCpp();
|
||||
e.addConfigHeader(m.config_header);
|
||||
m.builder.installArtifact(e);
|
||||
e.want_lto = m.enable_lto;
|
||||
return e;
|
||||
}
|
||||
};
|
||||
|
||||
const run_step = b.step("run-" ++ example_name, "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
pub fn build(b: *std.build.Builder) !void {
|
||||
var make = try Maker.init(b);
|
||||
make.enable_lto = b.option(bool, "lto", "Enable LTO optimization, (default: false)") orelse false;
|
||||
|
||||
if (b.option(bool, "k-quants", "Enable K-quants, (default: true)") orelse true) {
|
||||
try make.addFlag("-DGGML_USE_K_QUANTS");
|
||||
const k_quants = make.obj("k_quants", "k_quants.c");
|
||||
try make.objs.append(k_quants);
|
||||
}
|
||||
|
||||
const ggml = make.obj("ggml", "ggml.c");
|
||||
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
|
||||
const llama = make.obj("llama", "llama.cpp");
|
||||
const common = make.obj("common", "examples/common.cpp");
|
||||
const console = make.obj("common", "examples/console.cpp");
|
||||
const grammar_parser = make.obj("grammar-parser", "examples/grammar-parser.cpp");
|
||||
|
||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, console, grammar_parser });
|
||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama });
|
||||
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, llama, common });
|
||||
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, llama, common });
|
||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, llama });
|
||||
|
||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
|
||||
if (server.target.isWindows()) {
|
||||
server.linkSystemLibrary("ws2_32");
|
||||
}
|
||||
}
|
||||
|
||||
44
ci/run.sh
44
ci/run.sh
@@ -159,17 +159,17 @@ function gg_run_open_llama_3b_v2 {
|
||||
|
||||
python3 ../convert.py ${path_models}
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.bin"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.bin"
|
||||
model_q4_0="${path_models}/ggml-model-q4_0.bin"
|
||||
model_q4_1="${path_models}/ggml-model-q4_1.bin"
|
||||
model_q5_0="${path_models}/ggml-model-q5_0.bin"
|
||||
model_q5_1="${path_models}/ggml-model-q5_1.bin"
|
||||
model_q2_k="${path_models}/ggml-model-q2_k.bin"
|
||||
model_q3_k="${path_models}/ggml-model-q3_k.bin"
|
||||
model_q4_k="${path_models}/ggml-model-q4_k.bin"
|
||||
model_q5_k="${path_models}/ggml-model-q5_k.bin"
|
||||
model_q6_k="${path_models}/ggml-model-q6_k.bin"
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
||||
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
||||
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
||||
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
||||
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
||||
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
||||
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
||||
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
||||
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
||||
|
||||
wiki_test_60="${path_wiki}/wiki.test-60.raw"
|
||||
|
||||
@@ -285,17 +285,17 @@ function gg_run_open_llama_7b_v2 {
|
||||
|
||||
python3 ../convert.py ${path_models}
|
||||
|
||||
model_f16="${path_models}/ggml-model-f16.bin"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.bin"
|
||||
model_q4_0="${path_models}/ggml-model-q4_0.bin"
|
||||
model_q4_1="${path_models}/ggml-model-q4_1.bin"
|
||||
model_q5_0="${path_models}/ggml-model-q5_0.bin"
|
||||
model_q5_1="${path_models}/ggml-model-q5_1.bin"
|
||||
model_q2_k="${path_models}/ggml-model-q2_k.bin"
|
||||
model_q3_k="${path_models}/ggml-model-q3_k.bin"
|
||||
model_q4_k="${path_models}/ggml-model-q4_k.bin"
|
||||
model_q5_k="${path_models}/ggml-model-q5_k.bin"
|
||||
model_q6_k="${path_models}/ggml-model-q6_k.bin"
|
||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
||||
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
||||
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
||||
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
||||
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
||||
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
||||
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
||||
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
||||
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
||||
|
||||
wiki_test="${path_wiki}/wiki.test.raw"
|
||||
|
||||
|
||||
20
common/CMakeLists.txt
Normal file
20
common/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
# common
|
||||
|
||||
set(TARGET common)
|
||||
|
||||
add_library(${TARGET} OBJECT
|
||||
common.h
|
||||
common.cpp
|
||||
console.h
|
||||
console.cpp
|
||||
grammar-parser.h
|
||||
grammar-parser.cpp
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC .)
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
||||
target_link_libraries(${TARGET} PRIVATE llama)
|
||||
@@ -25,7 +25,6 @@
|
||||
#else
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
@@ -171,18 +170,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.n_ctx = std::stoi(argv[i]);
|
||||
} else if (arg == "-gqa" || arg == "--gqa") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_gqa = std::stoi(argv[i]);
|
||||
} else if (arg == "-eps" || arg == "--rms-norm-eps") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rms_norm_eps = std::stof(argv[i]);
|
||||
} else if (arg == "--rope-freq-base") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -195,6 +182,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.rope_freq_scale = std::stof(argv[i]);
|
||||
} else if (arg == "--rope-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rope_freq_scale = 1.0f/std::stof(argv[i]);
|
||||
} else if (arg == "--memory-f32") {
|
||||
params.memory_f16 = false;
|
||||
} else if (arg == "--top-p") {
|
||||
@@ -269,6 +262,21 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.cfg_negative_prompt = argv[i];
|
||||
} else if (arg == "--cfg-negative-prompt-file") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
std::ifstream file(argv[i]);
|
||||
if (!file) {
|
||||
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.cfg_negative_prompt));
|
||||
if (params.cfg_negative_prompt.back() == '\n') {
|
||||
params.cfg_negative_prompt.pop_back();
|
||||
}
|
||||
} else if (arg == "--cfg-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -281,7 +289,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.n_batch = std::stoi(argv[i]);
|
||||
params.n_batch = std::min(512, params.n_batch);
|
||||
} else if (arg == "--keep") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -329,6 +336,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
params.instruct = true;
|
||||
} else if (arg == "--multiline-input") {
|
||||
params.multiline_input = true;
|
||||
} else if (arg == "--simple-io") {
|
||||
params.simple_io = true;
|
||||
} else if (arg == "--color") {
|
||||
params.use_color = true;
|
||||
} else if (arg == "--mlock") {
|
||||
@@ -378,11 +387,11 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
#else
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n");
|
||||
#endif // GGML_USE_CUBLAS
|
||||
} else if (arg == "--mul-mat-q" || arg == "-mmq") {
|
||||
} else if (arg == "--no-mul-mat-q" || arg == "-nommq") {
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
params.mul_mat_q = true;
|
||||
params.mul_mat_q = false;
|
||||
#else
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to use mul_mat_q kernels.\n");
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Disabling mul_mat_q kernels has no effect.\n");
|
||||
#endif // GGML_USE_CUBLAS
|
||||
} else if (arg == "--low-vram" || arg == "-lv") {
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
@@ -417,7 +426,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
}
|
||||
params.hellaswag_tasks = std::stoi(argv[i]);
|
||||
} else if (arg == "--ignore-eos") {
|
||||
params.logit_bias[llama_token_eos()] = -INFINITY;
|
||||
params.ignore_eos = true;
|
||||
} else if (arg == "--no-penalize-nl") {
|
||||
params.penalize_nl = false;
|
||||
} else if (arg == "-l" || arg == "--logit-bias") {
|
||||
@@ -536,11 +545,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
fprintf(stdout, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
|
||||
fprintf(stdout, " -f FNAME, --file FNAME\n");
|
||||
fprintf(stdout, " prompt file to start generation.\n");
|
||||
fprintf(stdout, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
|
||||
fprintf(stdout, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
||||
fprintf(stdout, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
|
||||
fprintf(stdout, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
fprintf(stdout, " -gqa N, --gqa N grouped-query attention factor (TEMP!!! use 8 for LLaMAv2 70B) (default: %d)\n", params.n_gqa);
|
||||
fprintf(stdout, " -eps N, --rms-norm-eps N rms norm eps (TEMP!!! use 1e-5 for LLaMAv2) (default: %.1e)\n", params.rms_norm_eps);
|
||||
fprintf(stdout, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
|
||||
fprintf(stdout, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
|
||||
fprintf(stdout, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
|
||||
@@ -560,11 +567,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
fprintf(stdout, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
|
||||
fprintf(stdout, " --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n");
|
||||
fprintf(stdout, " --grammar-file FNAME file to read grammar from\n");
|
||||
fprintf(stdout, " --cfg-negative-prompt PROMPT \n");
|
||||
fprintf(stdout, " --cfg-negative-prompt PROMPT\n");
|
||||
fprintf(stdout, " negative prompt to use for guidance. (default: empty)\n");
|
||||
fprintf(stdout, " --cfg-negative-prompt-file FNAME\n");
|
||||
fprintf(stdout, " negative prompt file to use for guidance. (default: empty)\n");
|
||||
fprintf(stdout, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
|
||||
fprintf(stdout, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
|
||||
fprintf(stdout, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
|
||||
fprintf(stdout, " --rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale (default: %g)\n", 1.0f/params.rope_freq_scale);
|
||||
fprintf(stdout, " --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: %.1f)\n", params.rope_freq_base);
|
||||
fprintf(stdout, " --rope-freq-scale N RoPE frequency linear scaling factor, inverse of --rope-scale (default: %g)\n", params.rope_freq_scale);
|
||||
fprintf(stdout, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
|
||||
fprintf(stdout, " --no-penalize-nl do not penalize newline token\n");
|
||||
fprintf(stdout, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
|
||||
@@ -589,15 +599,16 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
fprintf(stdout, " number of layers to store in VRAM\n");
|
||||
fprintf(stdout, " -ts SPLIT --tensor-split SPLIT\n");
|
||||
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
||||
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
|
||||
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
|
||||
fprintf(stdout, " -mmq, --mul-mat-q use experimental mul_mat_q CUDA kernels instead of cuBLAS. TEMP!!!\n" );
|
||||
fprintf(stdout, " Reduces VRAM usage by 700/970/1430 MiB for 7b/13b/33b but prompt processing speed\n" );
|
||||
fprintf(stdout, " is still suboptimal, especially q2_K, q3_K, q5_K, and q6_K.\n" );
|
||||
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");
|
||||
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n");
|
||||
fprintf(stdout, " -nommq, --no-mul-mat-q\n");
|
||||
fprintf(stdout, " use cuBLAS instead of custom mul_mat_q CUDA kernels.\n");
|
||||
fprintf(stdout, " Not recommended since this is both slower and uses more VRAM.\n");
|
||||
#endif
|
||||
fprintf(stdout, " --mtest compute maximum memory usage\n");
|
||||
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");
|
||||
fprintf(stdout, " --verbose-prompt print prompt before generation\n");
|
||||
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n");
|
||||
fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
|
||||
fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
|
||||
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
||||
@@ -624,24 +635,15 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
||||
return "The";
|
||||
}
|
||||
|
||||
// TODO: not great allocating this every time
|
||||
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
||||
// initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
|
||||
std::vector<llama_token> res(text.size() + (int) add_bos);
|
||||
const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
||||
assert(n >= 0);
|
||||
res.resize(n);
|
||||
|
||||
return res;
|
||||
}
|
||||
//
|
||||
// Model utils
|
||||
//
|
||||
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
|
||||
auto lparams = llama_context_default_params();
|
||||
|
||||
lparams.n_ctx = params.n_ctx;
|
||||
lparams.n_batch = params.n_batch;
|
||||
lparams.n_gqa = params.n_gqa;
|
||||
lparams.rms_norm_eps = params.rms_norm_eps;
|
||||
lparams.n_gpu_layers = params.n_gpu_layers;
|
||||
lparams.main_gpu = params.main_gpu;
|
||||
lparams.tensor_split = params.tensor_split;
|
||||
@@ -659,7 +661,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||
return lparams;
|
||||
}
|
||||
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params) {
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
||||
auto lparams = llama_context_params_from_gpt_params(params);
|
||||
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), lparams);
|
||||
@@ -688,378 +690,77 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
}
|
||||
}
|
||||
|
||||
if (params.ignore_eos) {
|
||||
params.logit_bias[llama_token_eos(lctx)] = -INFINITY;
|
||||
}
|
||||
|
||||
return std::make_tuple(model, lctx);
|
||||
}
|
||||
|
||||
void console_init(console_state & con_st) {
|
||||
#if defined(_WIN32)
|
||||
// Windows-specific console initialization
|
||||
DWORD dwMode = 0;
|
||||
con_st.hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
if (con_st.hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(con_st.hConsole, &dwMode)) {
|
||||
con_st.hConsole = GetStdHandle(STD_ERROR_HANDLE);
|
||||
if (con_st.hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(con_st.hConsole, &dwMode))) {
|
||||
con_st.hConsole = NULL;
|
||||
}
|
||||
}
|
||||
if (con_st.hConsole) {
|
||||
// Enable ANSI colors on Windows 10+
|
||||
if (con_st.use_color && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
|
||||
SetConsoleMode(con_st.hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
|
||||
}
|
||||
// Set console output codepage to UTF8
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
}
|
||||
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
|
||||
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
|
||||
// Set console input codepage to UTF16
|
||||
_setmode(_fileno(stdin), _O_WTEXT);
|
||||
//
|
||||
// Vocab utils
|
||||
//
|
||||
|
||||
// Turn off ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
|
||||
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
|
||||
SetConsoleMode(hConIn, dwMode);
|
||||
}
|
||||
#else
|
||||
// POSIX-specific console initialization
|
||||
struct termios new_termios;
|
||||
tcgetattr(STDIN_FILENO, &con_st.prev_state);
|
||||
new_termios = con_st.prev_state;
|
||||
new_termios.c_lflag &= ~(ICANON | ECHO);
|
||||
new_termios.c_cc[VMIN] = 1;
|
||||
new_termios.c_cc[VTIME] = 0;
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
|
||||
|
||||
con_st.tty = fopen("/dev/tty", "w+");
|
||||
if (con_st.tty != nullptr) {
|
||||
con_st.out = con_st.tty;
|
||||
}
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
#endif
|
||||
}
|
||||
|
||||
void console_cleanup(console_state & con_st) {
|
||||
// Reset console color
|
||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||
|
||||
#if !defined(_WIN32)
|
||||
if (con_st.tty != nullptr) {
|
||||
con_st.out = stdout;
|
||||
fclose(con_st.tty);
|
||||
con_st.tty = nullptr;
|
||||
}
|
||||
// Restore the terminal settings on POSIX systems
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &con_st.prev_state);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Keep track of current color of output, and emit ANSI code if it changes. */
|
||||
void console_set_color(console_state & con_st, console_color_t color) {
|
||||
if (con_st.use_color && con_st.color != color) {
|
||||
fflush(stdout);
|
||||
switch(color) {
|
||||
case CONSOLE_COLOR_DEFAULT:
|
||||
fprintf(con_st.out, ANSI_COLOR_RESET);
|
||||
break;
|
||||
case CONSOLE_COLOR_PROMPT:
|
||||
fprintf(con_st.out, ANSI_COLOR_YELLOW);
|
||||
break;
|
||||
case CONSOLE_COLOR_USER_INPUT:
|
||||
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
|
||||
break;
|
||||
case CONSOLE_COLOR_ERROR:
|
||||
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
|
||||
break;
|
||||
}
|
||||
con_st.color = color;
|
||||
fflush(con_st.out);
|
||||
}
|
||||
}
|
||||
|
||||
char32_t getchar32() {
|
||||
#if defined(_WIN32)
|
||||
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
||||
wchar_t high_surrogate = 0;
|
||||
|
||||
while (true) {
|
||||
INPUT_RECORD record;
|
||||
DWORD count;
|
||||
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
|
||||
return WEOF;
|
||||
}
|
||||
|
||||
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
|
||||
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
|
||||
if (wc == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||
high_surrogate = wc;
|
||||
continue;
|
||||
} else if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
|
||||
if (high_surrogate != 0) { // Check if we have a high surrogate
|
||||
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
high_surrogate = 0; // Reset the high surrogate
|
||||
return static_cast<char32_t>(wc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
wchar_t wc = getwchar();
|
||||
if (static_cast<wint_t>(wc) == WEOF) {
|
||||
return WEOF;
|
||||
}
|
||||
|
||||
#if WCHAR_MAX == 0xFFFF
|
||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||
wchar_t low_surrogate = getwchar();
|
||||
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
|
||||
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
|
||||
}
|
||||
}
|
||||
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
|
||||
return 0xFFFD; // Return the replacement character U+FFFD
|
||||
}
|
||||
#endif
|
||||
|
||||
return static_cast<char32_t>(wc);
|
||||
#endif
|
||||
}
|
||||
|
||||
void pop_cursor(console_state & con_st) {
|
||||
#if defined(_WIN32)
|
||||
if (con_st.hConsole != NULL) {
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo);
|
||||
|
||||
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
||||
if (newCursorPosition.X == 0) {
|
||||
newCursorPosition.X = bufferInfo.dwSize.X - 1;
|
||||
newCursorPosition.Y -= 1;
|
||||
} else {
|
||||
newCursorPosition.X -= 1;
|
||||
}
|
||||
|
||||
SetConsoleCursorPosition(con_st.hConsole, newCursorPosition);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
putc('\b', con_st.out);
|
||||
}
|
||||
|
||||
int estimateWidth(char32_t codepoint) {
|
||||
#if defined(_WIN32)
|
||||
return 1;
|
||||
#else
|
||||
return wcwidth(codepoint);
|
||||
#endif
|
||||
}
|
||||
|
||||
int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||
#if defined(_WIN32)
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
if (!GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo)) {
|
||||
// go with the default
|
||||
return expectedWidth;
|
||||
}
|
||||
COORD initialPosition = bufferInfo.dwCursorPosition;
|
||||
DWORD nNumberOfChars = length;
|
||||
WriteConsole(con_st.hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
|
||||
|
||||
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
|
||||
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
|
||||
|
||||
// Figure out our real position if we're in the last column
|
||||
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
|
||||
DWORD nNumberOfChars;
|
||||
WriteConsole(con_st.hConsole, &" \b", 2, &nNumberOfChars, NULL);
|
||||
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
|
||||
}
|
||||
|
||||
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
|
||||
if (width < 0) {
|
||||
width += newBufferInfo.dwSize.X;
|
||||
}
|
||||
return width;
|
||||
#else
|
||||
// we can trust expectedWidth if we've got one
|
||||
if (expectedWidth >= 0 || con_st.tty == nullptr) {
|
||||
fwrite(utf8_codepoint, length, 1, con_st.out);
|
||||
return expectedWidth;
|
||||
}
|
||||
|
||||
fputs("\033[6n", con_st.tty); // Query cursor position
|
||||
int x1, x2, y1, y2;
|
||||
int results = 0;
|
||||
results = fscanf(con_st.tty, "\033[%d;%dR", &y1, &x1);
|
||||
|
||||
fwrite(utf8_codepoint, length, 1, con_st.tty);
|
||||
|
||||
fputs("\033[6n", con_st.tty); // Query cursor position
|
||||
results += fscanf(con_st.tty, "\033[%d;%dR", &y2, &x2);
|
||||
|
||||
if (results != 4) {
|
||||
return expectedWidth;
|
||||
}
|
||||
|
||||
int width = x2 - x1;
|
||||
if (width < 0) {
|
||||
// Calculate the width considering text wrapping
|
||||
struct winsize w;
|
||||
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
||||
width += w.ws_col;
|
||||
}
|
||||
return width;
|
||||
#endif
|
||||
}
|
||||
|
||||
void replace_last(console_state & con_st, char ch) {
|
||||
#if defined(_WIN32)
|
||||
pop_cursor(con_st);
|
||||
put_codepoint(con_st, &ch, 1, 1);
|
||||
#else
|
||||
fprintf(con_st.out, "\b%c", ch);
|
||||
#endif
|
||||
}
|
||||
|
||||
void append_utf8(char32_t ch, std::string & out) {
|
||||
if (ch <= 0x7F) {
|
||||
out.push_back(static_cast<unsigned char>(ch));
|
||||
} else if (ch <= 0x7FF) {
|
||||
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
} else if (ch <= 0xFFFF) {
|
||||
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
} else if (ch <= 0x10FFFF) {
|
||||
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
struct llama_context * ctx,
|
||||
const std::string & text,
|
||||
bool add_bos) {
|
||||
// upper limit for the number of tokens
|
||||
int n_tokens = text.length() + add_bos;
|
||||
std::vector<llama_token> result(n_tokens);
|
||||
n_tokens = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
int check = llama_tokenize(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
} else {
|
||||
// Invalid Unicode code point
|
||||
result.resize(n_tokens);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper function to remove the last UTF-8 character from a string
|
||||
void pop_back_utf8_char(std::string & line) {
|
||||
if (line.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t pos = line.length() - 1;
|
||||
|
||||
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
|
||||
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
|
||||
if ((line[pos] & 0xC0) != 0x80) break; // Found the start of the character
|
||||
}
|
||||
line.erase(pos);
|
||||
}
|
||||
|
||||
bool console_readline(console_state & con_st, std::string & line) {
|
||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||
if (con_st.out != stdout) {
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
line.clear();
|
||||
std::vector<int> widths;
|
||||
bool is_special_char = false;
|
||||
bool end_of_stream = false;
|
||||
|
||||
char32_t input_char;
|
||||
while (true) {
|
||||
fflush(con_st.out); // Ensure all output is displayed before waiting for input
|
||||
input_char = getchar32();
|
||||
|
||||
if (input_char == '\r' || input_char == '\n') {
|
||||
break;
|
||||
}
|
||||
|
||||
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
||||
end_of_stream = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_special_char) {
|
||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||
replace_last(con_st, line.back());
|
||||
is_special_char = false;
|
||||
}
|
||||
|
||||
if (input_char == '\033') { // Escape sequence
|
||||
char32_t code = getchar32();
|
||||
if (code == '[' || code == 0x1B) {
|
||||
// Discard the rest of the escape sequence
|
||||
while ((code = getchar32()) != (char32_t) WEOF) {
|
||||
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
|
||||
if (!widths.empty()) {
|
||||
int count;
|
||||
do {
|
||||
count = widths.back();
|
||||
widths.pop_back();
|
||||
// Move cursor back, print space, and move cursor back again
|
||||
for (int i = 0; i < count; i++) {
|
||||
replace_last(con_st, ' ');
|
||||
pop_cursor(con_st);
|
||||
}
|
||||
pop_back_utf8_char(line);
|
||||
} while (count == 0 && !widths.empty());
|
||||
}
|
||||
} else {
|
||||
int offset = line.length();
|
||||
append_utf8(input_char, line);
|
||||
int width = put_codepoint(con_st, line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
|
||||
if (width < 0) {
|
||||
width = 0;
|
||||
}
|
||||
widths.push_back(width);
|
||||
}
|
||||
|
||||
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
|
||||
console_set_color(con_st, CONSOLE_COLOR_PROMPT);
|
||||
replace_last(con_st, line.back());
|
||||
is_special_char = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_more = con_st.multiline_input;
|
||||
if (is_special_char) {
|
||||
replace_last(con_st, ' ');
|
||||
pop_cursor(con_st);
|
||||
|
||||
char last = line.back();
|
||||
line.pop_back();
|
||||
if (last == '\\') {
|
||||
line += '\n';
|
||||
fputc('\n', con_st.out);
|
||||
has_more = !has_more;
|
||||
} else {
|
||||
// llama will just eat the single space, it won't act as a space
|
||||
if (line.length() == 1 && line.back() == ' ') {
|
||||
line.clear();
|
||||
pop_cursor(con_st);
|
||||
}
|
||||
has_more = false;
|
||||
}
|
||||
std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
|
||||
std::vector<char> result(8, 0);
|
||||
const int n_tokens = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
int check = llama_token_to_str(ctx, token, result.data(), result.size());
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
} else {
|
||||
if (end_of_stream) {
|
||||
has_more = false;
|
||||
} else {
|
||||
line += '\n';
|
||||
fputc('\n', con_st.out);
|
||||
}
|
||||
result.resize(n_tokens);
|
||||
}
|
||||
|
||||
fflush(con_st.out);
|
||||
return has_more;
|
||||
return std::string(result.data(), result.size());
|
||||
}
|
||||
|
||||
std::vector<llama_token> llama_tokenize_bpe(
|
||||
struct llama_context * ctx,
|
||||
const std::string & text,
|
||||
bool add_bos) {
|
||||
int n_tokens = text.length() + add_bos;
|
||||
std::vector<llama_token> result(n_tokens);
|
||||
n_tokens = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
int check = llama_tokenize_bpe(ctx, text.c_str(), result.data(), result.size(), add_bos);
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
} else {
|
||||
result.resize(n_tokens);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token) {
|
||||
std::vector<char> result(8, 0);
|
||||
const int n_tokens = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
const int check = llama_token_to_str_bpe(ctx, token, result.data(), result.size());
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
} else {
|
||||
result.resize(n_tokens);
|
||||
}
|
||||
|
||||
return std::string(result.data(), result.size());
|
||||
}
|
||||
|
||||
@@ -11,11 +11,6 @@
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
|
||||
#if !defined (_WIN32)
|
||||
#include <stdio.h>
|
||||
#include <termios.h>
|
||||
#endif
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
@@ -27,19 +22,16 @@ struct gpt_params {
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_gqa = 1; // grouped-query attention factor (TODO: move to hparams)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
int32_t n_gpu_layers = 0; // number of layers to store in VRAM
|
||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; // rms norm epsilon
|
||||
float rope_freq_base = 10000.0f; // RoPE base frequency
|
||||
float rope_freq_scale = 1.0f; // RoPE frequency scaling factor
|
||||
|
||||
// sampling parameters
|
||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float tfs_z = 1.00f; // 1.0 = disabled
|
||||
@@ -53,12 +45,14 @@ struct gpt_params {
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
|
||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||
|
||||
// Classifier-Free Guidance
|
||||
// https://arxiv.org/abs/2306.17806
|
||||
std::string cfg_negative_prompt; // string to help guidance
|
||||
float cfg_scale = 1.f; // How strong is guidance
|
||||
|
||||
std::string model = "models/7B/ggml-model.bin"; // model path
|
||||
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
|
||||
std::string model_alias = "unknown"; // model alias
|
||||
std::string prompt = "";
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||
@@ -74,7 +68,7 @@ struct gpt_params {
|
||||
size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score
|
||||
|
||||
bool low_vram = false; // if true, reduce VRAM usage at the cost of performance
|
||||
bool mul_mat_q = false; // if true, use experimental mul_mat_q kernels
|
||||
bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS
|
||||
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
||||
bool random_prompt = false; // do not randomize prompt if none provided
|
||||
bool use_color = false; // use color to distinguish generations and inputs
|
||||
@@ -85,8 +79,10 @@ struct gpt_params {
|
||||
bool embedding = false; // get only sentence embedding
|
||||
bool interactive_first = false; // wait for user input immediately
|
||||
bool multiline_input = false; // reverse the usage of `\`
|
||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool ignore_eos = false; // ignore generated EOS tokens
|
||||
bool instruct = false; // instruction mode (used for Alpaca models)
|
||||
bool penalize_nl = true; // consider newlines as a repeatable token
|
||||
bool perplexity = false; // compute perplexity over the prompt
|
||||
@@ -104,54 +100,31 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
|
||||
|
||||
std::string gpt_random_prompt(std::mt19937 & rng);
|
||||
|
||||
//
|
||||
// Vocab utils
|
||||
//
|
||||
|
||||
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
|
||||
|
||||
//
|
||||
// Model utils
|
||||
//
|
||||
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||
|
||||
//
|
||||
// Console utils
|
||||
// Vocab utils
|
||||
//
|
||||
|
||||
#define ANSI_COLOR_RED "\x1b[31m"
|
||||
#define ANSI_COLOR_GREEN "\x1b[32m"
|
||||
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
||||
#define ANSI_COLOR_BLUE "\x1b[34m"
|
||||
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
||||
#define ANSI_COLOR_CYAN "\x1b[36m"
|
||||
#define ANSI_COLOR_RESET "\x1b[0m"
|
||||
#define ANSI_BOLD "\x1b[1m"
|
||||
std::vector<llama_token> llama_tokenize(
|
||||
struct llama_context * ctx,
|
||||
const std::string & text,
|
||||
bool add_bos);
|
||||
|
||||
enum console_color_t {
|
||||
CONSOLE_COLOR_DEFAULT=0,
|
||||
CONSOLE_COLOR_PROMPT,
|
||||
CONSOLE_COLOR_USER_INPUT,
|
||||
CONSOLE_COLOR_ERROR
|
||||
};
|
||||
std::vector<llama_token> llama_tokenize_bpe(
|
||||
struct llama_context * ctx,
|
||||
const std::string & text,
|
||||
bool add_bos);
|
||||
|
||||
struct console_state {
|
||||
bool multiline_input = false;
|
||||
bool use_color = false;
|
||||
console_color_t color = CONSOLE_COLOR_DEFAULT;
|
||||
std::string llama_token_to_str(
|
||||
const struct llama_context * ctx,
|
||||
llama_token token);
|
||||
|
||||
FILE* out = stdout;
|
||||
#if defined (_WIN32)
|
||||
void* hConsole;
|
||||
#else
|
||||
FILE* tty = nullptr;
|
||||
termios prev_state;
|
||||
#endif
|
||||
};
|
||||
|
||||
void console_init(console_state & con_st);
|
||||
void console_cleanup(console_state & con_st);
|
||||
void console_set_color(console_state & con_st, console_color_t color);
|
||||
bool console_readline(console_state & con_st, std::string & line);
|
||||
std::string llama_token_to_str_bpe(
|
||||
const struct llama_context * ctx,
|
||||
llama_token token);
|
||||
500
common/console.cpp
Normal file
500
common/console.cpp
Normal file
@@ -0,0 +1,500 @@
|
||||
#include "console.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
|
||||
#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
|
||||
#endif
|
||||
#else
|
||||
#include <climits>
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#include <wchar.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <termios.h>
|
||||
#endif
|
||||
|
||||
#define ANSI_COLOR_RED "\x1b[31m"
|
||||
#define ANSI_COLOR_GREEN "\x1b[32m"
|
||||
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
||||
#define ANSI_COLOR_BLUE "\x1b[34m"
|
||||
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
||||
#define ANSI_COLOR_CYAN "\x1b[36m"
|
||||
#define ANSI_COLOR_RESET "\x1b[0m"
|
||||
#define ANSI_BOLD "\x1b[1m"
|
||||
|
||||
namespace console {
|
||||
|
||||
//
|
||||
// Console state
|
||||
//
|
||||
|
||||
static bool advanced_display = false;
|
||||
static bool simple_io = true;
|
||||
static display_t current_display = reset;
|
||||
|
||||
static FILE* out = stdout;
|
||||
|
||||
#if defined (_WIN32)
|
||||
static void* hConsole;
|
||||
#else
|
||||
static FILE* tty = nullptr;
|
||||
static termios initial_state;
|
||||
#endif
|
||||
|
||||
//
|
||||
// Init and cleanup
|
||||
//
|
||||
|
||||
void init(bool use_simple_io, bool use_advanced_display) {
|
||||
advanced_display = use_advanced_display;
|
||||
simple_io = use_simple_io;
|
||||
#if defined(_WIN32)
|
||||
// Windows-specific console initialization
|
||||
DWORD dwMode = 0;
|
||||
hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) {
|
||||
hConsole = GetStdHandle(STD_ERROR_HANDLE);
|
||||
if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) {
|
||||
hConsole = nullptr;
|
||||
simple_io = true;
|
||||
}
|
||||
}
|
||||
if (hConsole) {
|
||||
// Check conditions combined to reduce nesting
|
||||
if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) &&
|
||||
!SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
|
||||
advanced_display = false;
|
||||
}
|
||||
// Set console output codepage to UTF8
|
||||
SetConsoleOutputCP(CP_UTF8);
|
||||
}
|
||||
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
|
||||
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
|
||||
// Set console input codepage to UTF16
|
||||
_setmode(_fileno(stdin), _O_WTEXT);
|
||||
|
||||
// Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
|
||||
if (simple_io) {
|
||||
dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
|
||||
} else {
|
||||
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
|
||||
}
|
||||
if (!SetConsoleMode(hConIn, dwMode)) {
|
||||
simple_io = true;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// POSIX-specific console initialization
|
||||
if (!simple_io) {
|
||||
struct termios new_termios;
|
||||
tcgetattr(STDIN_FILENO, &initial_state);
|
||||
new_termios = initial_state;
|
||||
new_termios.c_lflag &= ~(ICANON | ECHO);
|
||||
new_termios.c_cc[VMIN] = 1;
|
||||
new_termios.c_cc[VTIME] = 0;
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
|
||||
|
||||
tty = fopen("/dev/tty", "w+");
|
||||
if (tty != nullptr) {
|
||||
out = tty;
|
||||
}
|
||||
}
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
#endif
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
// Reset console display
|
||||
set_display(reset);
|
||||
|
||||
#if !defined(_WIN32)
|
||||
// Restore settings on POSIX systems
|
||||
if (!simple_io) {
|
||||
if (tty != nullptr) {
|
||||
out = stdout;
|
||||
fclose(tty);
|
||||
tty = nullptr;
|
||||
}
|
||||
tcsetattr(STDIN_FILENO, TCSANOW, &initial_state);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// Display and IO
|
||||
//
|
||||
|
||||
// Keep track of current display and only emit ANSI code if it changes
|
||||
void set_display(display_t display) {
|
||||
if (advanced_display && current_display != display) {
|
||||
fflush(stdout);
|
||||
switch(display) {
|
||||
case reset:
|
||||
fprintf(out, ANSI_COLOR_RESET);
|
||||
break;
|
||||
case prompt:
|
||||
fprintf(out, ANSI_COLOR_YELLOW);
|
||||
break;
|
||||
case user_input:
|
||||
fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN);
|
||||
break;
|
||||
case error:
|
||||
fprintf(out, ANSI_BOLD ANSI_COLOR_RED);
|
||||
}
|
||||
current_display = display;
|
||||
fflush(out);
|
||||
}
|
||||
}
|
||||
|
||||
char32_t getchar32() {
|
||||
#if defined(_WIN32)
|
||||
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
||||
wchar_t high_surrogate = 0;
|
||||
|
||||
while (true) {
|
||||
INPUT_RECORD record;
|
||||
DWORD count;
|
||||
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
|
||||
return WEOF;
|
||||
}
|
||||
|
||||
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
|
||||
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
|
||||
if (wc == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||
high_surrogate = wc;
|
||||
continue;
|
||||
}
|
||||
if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
|
||||
if (high_surrogate != 0) { // Check if we have a high surrogate
|
||||
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
|
||||
}
|
||||
}
|
||||
|
||||
high_surrogate = 0; // Reset the high surrogate
|
||||
return static_cast<char32_t>(wc);
|
||||
}
|
||||
}
|
||||
#else
|
||||
wchar_t wc = getwchar();
|
||||
if (static_cast<wint_t>(wc) == WEOF) {
|
||||
return WEOF;
|
||||
}
|
||||
|
||||
#if WCHAR_MAX == 0xFFFF
|
||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||
wchar_t low_surrogate = getwchar();
|
||||
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
|
||||
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
|
||||
}
|
||||
}
|
||||
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
|
||||
return 0xFFFD; // Return the replacement character U+FFFD
|
||||
}
|
||||
#endif
|
||||
|
||||
return static_cast<char32_t>(wc);
|
||||
#endif
|
||||
}
|
||||
|
||||
void pop_cursor() {
|
||||
#if defined(_WIN32)
|
||||
if (hConsole != NULL) {
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
GetConsoleScreenBufferInfo(hConsole, &bufferInfo);
|
||||
|
||||
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
||||
if (newCursorPosition.X == 0) {
|
||||
newCursorPosition.X = bufferInfo.dwSize.X - 1;
|
||||
newCursorPosition.Y -= 1;
|
||||
} else {
|
||||
newCursorPosition.X -= 1;
|
||||
}
|
||||
|
||||
SetConsoleCursorPosition(hConsole, newCursorPosition);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
putc('\b', out);
|
||||
}
|
||||
|
||||
int estimateWidth(char32_t codepoint) {
|
||||
#if defined(_WIN32)
|
||||
return 1;
|
||||
#else
|
||||
return wcwidth(codepoint);
|
||||
#endif
|
||||
}
|
||||
|
||||
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||
#if defined(_WIN32)
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
||||
// go with the default
|
||||
return expectedWidth;
|
||||
}
|
||||
COORD initialPosition = bufferInfo.dwCursorPosition;
|
||||
DWORD nNumberOfChars = length;
|
||||
WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
|
||||
|
||||
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
|
||||
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
||||
|
||||
// Figure out our real position if we're in the last column
|
||||
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
|
||||
DWORD nNumberOfChars;
|
||||
WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL);
|
||||
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
||||
}
|
||||
|
||||
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
|
||||
if (width < 0) {
|
||||
width += newBufferInfo.dwSize.X;
|
||||
}
|
||||
return width;
|
||||
#else
|
||||
// We can trust expectedWidth if we've got one
|
||||
if (expectedWidth >= 0 || tty == nullptr) {
|
||||
fwrite(utf8_codepoint, length, 1, out);
|
||||
return expectedWidth;
|
||||
}
|
||||
|
||||
fputs("\033[6n", tty); // Query cursor position
|
||||
int x1;
|
||||
int y1;
|
||||
int x2;
|
||||
int y2;
|
||||
int results = 0;
|
||||
results = fscanf(tty, "\033[%d;%dR", &y1, &x1);
|
||||
|
||||
fwrite(utf8_codepoint, length, 1, tty);
|
||||
|
||||
fputs("\033[6n", tty); // Query cursor position
|
||||
results += fscanf(tty, "\033[%d;%dR", &y2, &x2);
|
||||
|
||||
if (results != 4) {
|
||||
return expectedWidth;
|
||||
}
|
||||
|
||||
int width = x2 - x1;
|
||||
if (width < 0) {
|
||||
// Calculate the width considering text wrapping
|
||||
struct winsize w;
|
||||
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
||||
width += w.ws_col;
|
||||
}
|
||||
return width;
|
||||
#endif
|
||||
}
|
||||
|
||||
void replace_last(char ch) {
|
||||
#if defined(_WIN32)
|
||||
pop_cursor();
|
||||
put_codepoint(&ch, 1, 1);
|
||||
#else
|
||||
fprintf(out, "\b%c", ch);
|
||||
#endif
|
||||
}
|
||||
|
||||
void append_utf8(char32_t ch, std::string & out) {
|
||||
if (ch <= 0x7F) {
|
||||
out.push_back(static_cast<unsigned char>(ch));
|
||||
} else if (ch <= 0x7FF) {
|
||||
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
} else if (ch <= 0xFFFF) {
|
||||
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
} else if (ch <= 0x10FFFF) {
|
||||
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||
} else {
|
||||
// Invalid Unicode code point
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to remove the last UTF-8 character from a string
|
||||
void pop_back_utf8_char(std::string & line) {
|
||||
if (line.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t pos = line.length() - 1;
|
||||
|
||||
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
|
||||
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
|
||||
if ((line[pos] & 0xC0) != 0x80) {
|
||||
break; // Found the start of the character
|
||||
}
|
||||
}
|
||||
line.erase(pos);
|
||||
}
|
||||
|
||||
bool readline_advanced(std::string & line, bool multiline_input) {
|
||||
if (out != stdout) {
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
line.clear();
|
||||
std::vector<int> widths;
|
||||
bool is_special_char = false;
|
||||
bool end_of_stream = false;
|
||||
|
||||
char32_t input_char;
|
||||
while (true) {
|
||||
fflush(out); // Ensure all output is displayed before waiting for input
|
||||
input_char = getchar32();
|
||||
|
||||
if (input_char == '\r' || input_char == '\n') {
|
||||
break;
|
||||
}
|
||||
|
||||
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
||||
end_of_stream = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_special_char) {
|
||||
set_display(user_input);
|
||||
replace_last(line.back());
|
||||
is_special_char = false;
|
||||
}
|
||||
|
||||
if (input_char == '\033') { // Escape sequence
|
||||
char32_t code = getchar32();
|
||||
if (code == '[' || code == 0x1B) {
|
||||
// Discard the rest of the escape sequence
|
||||
while ((code = getchar32()) != (char32_t) WEOF) {
|
||||
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
|
||||
if (!widths.empty()) {
|
||||
int count;
|
||||
do {
|
||||
count = widths.back();
|
||||
widths.pop_back();
|
||||
// Move cursor back, print space, and move cursor back again
|
||||
for (int i = 0; i < count; i++) {
|
||||
replace_last(' ');
|
||||
pop_cursor();
|
||||
}
|
||||
pop_back_utf8_char(line);
|
||||
} while (count == 0 && !widths.empty());
|
||||
}
|
||||
} else {
|
||||
int offset = line.length();
|
||||
append_utf8(input_char, line);
|
||||
int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
|
||||
if (width < 0) {
|
||||
width = 0;
|
||||
}
|
||||
widths.push_back(width);
|
||||
}
|
||||
|
||||
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
|
||||
set_display(prompt);
|
||||
replace_last(line.back());
|
||||
is_special_char = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_more = multiline_input;
|
||||
if (is_special_char) {
|
||||
replace_last(' ');
|
||||
pop_cursor();
|
||||
|
||||
char last = line.back();
|
||||
line.pop_back();
|
||||
if (last == '\\') {
|
||||
line += '\n';
|
||||
fputc('\n', out);
|
||||
has_more = !has_more;
|
||||
} else {
|
||||
// llama will just eat the single space, it won't act as a space
|
||||
if (line.length() == 1 && line.back() == ' ') {
|
||||
line.clear();
|
||||
pop_cursor();
|
||||
}
|
||||
has_more = false;
|
||||
}
|
||||
} else {
|
||||
if (end_of_stream) {
|
||||
has_more = false;
|
||||
} else {
|
||||
line += '\n';
|
||||
fputc('\n', out);
|
||||
}
|
||||
}
|
||||
|
||||
fflush(out);
|
||||
return has_more;
|
||||
}
|
||||
|
||||
bool readline_simple(std::string & line, bool multiline_input) {
|
||||
#if defined(_WIN32)
|
||||
std::wstring wline;
|
||||
if (!std::getline(std::wcin, wline)) {
|
||||
// Input stream is bad or EOF received
|
||||
line.clear();
|
||||
GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL);
|
||||
line.resize(size_needed);
|
||||
WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL);
|
||||
#else
|
||||
if (!std::getline(std::cin, line)) {
|
||||
// Input stream is bad or EOF received
|
||||
line.clear();
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
if (!line.empty()) {
|
||||
char last = line.back();
|
||||
if (last == '/') { // Always return control on '/' symbol
|
||||
line.pop_back();
|
||||
return false;
|
||||
}
|
||||
if (last == '\\') { // '\\' changes the default action
|
||||
line.pop_back();
|
||||
multiline_input = !multiline_input;
|
||||
}
|
||||
}
|
||||
line += '\n';
|
||||
|
||||
// By default, continue input if multiline_input is set
|
||||
return multiline_input;
|
||||
}
|
||||
|
||||
bool readline(std::string & line, bool multiline_input) {
|
||||
set_display(user_input);
|
||||
|
||||
if (simple_io) {
|
||||
return readline_simple(line, multiline_input);
|
||||
}
|
||||
return readline_advanced(line, multiline_input);
|
||||
}
|
||||
|
||||
}
|
||||
19
common/console.h
Normal file
19
common/console.h
Normal file
@@ -0,0 +1,19 @@
|
||||
// Console functions
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace console {
|
||||
enum display_t {
|
||||
reset = 0,
|
||||
prompt,
|
||||
user_input,
|
||||
error
|
||||
};
|
||||
|
||||
void init(bool use_simple_io, bool use_advanced_display);
|
||||
void cleanup();
|
||||
void set_display(display_t display);
|
||||
bool readline(std::string & line, bool multiline_input);
|
||||
}
|
||||
@@ -405,7 +405,7 @@ namespace grammar_parser {
|
||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||
// fprintf(file, "%zu: ", i);
|
||||
// print_rule_binary(file, state.rules[i]);
|
||||
print_rule(file, i, state.rules[i], symbol_id_names);
|
||||
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
||||
// fprintf(file, "\n");
|
||||
}
|
||||
} catch (const std::exception & err) {
|
||||
282
convert-falcon-hf-to-gguf.py
Normal file
282
convert-falcon-hf-to-gguf.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# HF falcon--> gguf conversion
|
||||
|
||||
import gguf
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from typing import Any, List
|
||||
from pathlib import Path
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
def bytes_to_unicode():
|
||||
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
||||
"""
|
||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
||||
The reversible bpe codes work on unicode strings.
|
||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||
"""
|
||||
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
|
||||
cs = bs[:]
|
||||
n = 0
|
||||
for b in range(2**8):
|
||||
if b not in bs:
|
||||
bs.append(b)
|
||||
cs.append(2**8+n)
|
||||
n += 1
|
||||
cs = [chr(n) for n in cs]
|
||||
return dict(zip(bs, cs))
|
||||
|
||||
|
||||
def count_model_parts(dir_model: str) -> int:
|
||||
num_parts = 0
|
||||
for filename in os.listdir(dir_model):
|
||||
if filename.startswith("pytorch_model-"):
|
||||
num_parts += 1
|
||||
|
||||
if num_parts > 0:
|
||||
print("gguf: found " + str(num_parts) + " model parts")
|
||||
return num_parts
|
||||
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
print(" ftype == 1 -> float16")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# output in the same directory as the model
|
||||
dir_model = sys.argv[1]
|
||||
last_dir = os.path.basename(os.path.normpath(dir_model))
|
||||
|
||||
# possible tensor data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if len(sys.argv) > 2:
|
||||
ftype = int(sys.argv[2])
|
||||
if ftype < 0 or ftype > 1:
|
||||
print("Invalid ftype: " + str(ftype))
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
|
||||
|
||||
print("gguf: loading model "+last_dir)
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
hparams = json.load(f)
|
||||
|
||||
if hparams["architectures"][0] != "RWForCausalLM":
|
||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||
|
||||
sys.exit()
|
||||
|
||||
# get number of model parts
|
||||
num_parts = count_model_parts(dir_model)
|
||||
|
||||
ARCH=gguf.MODEL_ARCH.FALCON
|
||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||
|
||||
print("gguf: get model metadata")
|
||||
|
||||
block_count = hparams["n_layer"]
|
||||
|
||||
gguf_writer.add_name(last_dir)
|
||||
gguf_writer.add_context_length(2048) # not in config.json
|
||||
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_head_count(hparams["n_head"])
|
||||
if "n_head_kv" in hparams: gguf_writer.add_head_count_kv(hparams["n_head_kv"])
|
||||
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
|
||||
|
||||
# TOKENIZATION
|
||||
|
||||
print("gguf: get tokenizer metadata")
|
||||
|
||||
tokens: List[str] = []
|
||||
merges: List[str] = []
|
||||
|
||||
|
||||
if Path(dir_model + "/tokenizer.json").is_file():
|
||||
# gpt2 tokenizer
|
||||
gguf_writer.add_tokenizer_model("gpt2")
|
||||
|
||||
print("gguf: get gpt2 tokenizer merges")
|
||||
|
||||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_json = json.load(f)
|
||||
merges = tokenizer_json["model"]["merges"]
|
||||
|
||||
gguf_writer.add_token_merges(merges)
|
||||
|
||||
print("gguf: get gpt2 tokenizer vocab")
|
||||
|
||||
vocab_size = len(tokenizer_json["model"]["vocab"])
|
||||
|
||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||
byte_encoder = bytes_to_unicode()
|
||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
||||
|
||||
for i in range(vocab_size):
|
||||
if i in reverse_vocab:
|
||||
try:
|
||||
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
||||
except KeyError:
|
||||
text = bytearray()
|
||||
for c in reverse_vocab[i]:
|
||||
if ord(c) < 256: # single byte character
|
||||
text.append(byte_decoder[ord(c)])
|
||||
else: # multibyte special token character
|
||||
text.extend(c.encode('utf-8'))
|
||||
else:
|
||||
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
||||
pad_token = f"[PAD{i}]".encode("utf8")
|
||||
text = bytearray(pad_token)
|
||||
|
||||
tokens.append(text)
|
||||
|
||||
gguf_writer.add_token_list(tokens)
|
||||
|
||||
if "added_tokens" in tokenizer_json and Path(dir_model + "/tokenizer_config.json").is_file():
|
||||
print("gguf: get special token ids")
|
||||
|
||||
with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_config = json.load(f)
|
||||
|
||||
# find special token ids
|
||||
|
||||
if "bos_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["bos_token"]:
|
||||
gguf_writer.add_bos_token_id(key["id"])
|
||||
|
||||
if "eos_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["eos_token"]:
|
||||
gguf_writer.add_eos_token_id(key["id"])
|
||||
|
||||
if "unk_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["unk_token"]:
|
||||
gguf_writer.add_unk_token_id(key["id"])
|
||||
|
||||
if "sep_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["sep_token"]:
|
||||
gguf_writer.add_sep_token_id(key["id"])
|
||||
|
||||
if "pad_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["pad_token"]:
|
||||
gguf_writer.add_pad_token_id(key["id"])
|
||||
|
||||
|
||||
# TENSORS
|
||||
|
||||
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
||||
|
||||
# params for qkv transform
|
||||
n_head = hparams["n_head"]
|
||||
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
|
||||
head_dim = hparams["hidden_size"] // n_head
|
||||
|
||||
# tensor info
|
||||
print("gguf: get tensor metadata")
|
||||
|
||||
if num_parts == 0:
|
||||
part_names = ("pytorch_model.bin",)
|
||||
else:
|
||||
part_names = (
|
||||
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
||||
)
|
||||
|
||||
for part_name in part_names:
|
||||
print("gguf: loading model part '" + part_name + "'")
|
||||
model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
|
||||
|
||||
for name in model_part.keys():
|
||||
data = model_part[name]
|
||||
|
||||
old_dtype = data.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data.dtype != torch.float16 and data.dtype != torch.float32:
|
||||
data = data.to(torch.float32)
|
||||
|
||||
# QKV tensor transform
|
||||
# The original query_key_value tensor contains n_head_kv "kv groups",
|
||||
# each consisting of n_head/n_head_kv query weights followed by one key
|
||||
# and one value weight (shared by all query heads in the kv group).
|
||||
# This layout makes it a big pain to work with in GGML.
|
||||
# So we rearrange them here,, so that we have n_head query weights
|
||||
# followed by n_head_kv key weights followed by n_head_kv value weights,
|
||||
# in contiguous fashion.
|
||||
# ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
|
||||
|
||||
if "query_key_value" in name:
|
||||
qkv = data.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
|
||||
q = qkv[:, :-2 ].reshape(n_head * head_dim, head_dim * n_head)
|
||||
k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
|
||||
v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
|
||||
data = torch.cat((q,k,v)).reshape_as(data)
|
||||
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
# map tensor names
|
||||
if name.endswith(".weight") and name[:-7] in tensor_map:
|
||||
name = tensor_map[name[:-7]] + ".weight"
|
||||
elif name.endswith(".bias") and name[:-5] in tensor_map:
|
||||
name = tensor_map[name[:-5]] + ".bias"
|
||||
else:
|
||||
print("Can not map tensor '" + name + "'")
|
||||
sys.exit()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
|
||||
gguf_writer.add_tensor(name, data)
|
||||
|
||||
|
||||
print("gguf: write header")
|
||||
gguf_writer.write_header_to_file()
|
||||
print("gguf: write metadata")
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
print("gguf: write tensors")
|
||||
gguf_writer.write_tensors_to_file()
|
||||
|
||||
gguf_writer.close()
|
||||
|
||||
print("gguf: model successfully exported to '" + fname_out + "'")
|
||||
print("")
|
||||
266
convert-gptneox-hf-to-gguf.py
Normal file
266
convert-gptneox-hf-to-gguf.py
Normal file
@@ -0,0 +1,266 @@
|
||||
# HF gptneox--> gguf conversion
|
||||
|
||||
import gguf
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from typing import Any, List
|
||||
from pathlib import Path
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
||||
|
||||
|
||||
def bytes_to_unicode():
|
||||
"""
|
||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
||||
The reversible bpe codes work on unicode strings.
|
||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||
"""
|
||||
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
|
||||
cs = bs[:]
|
||||
n = 0
|
||||
for b in range(2**8):
|
||||
if b not in bs:
|
||||
bs.append(b)
|
||||
cs.append(2**8+n)
|
||||
n += 1
|
||||
cs = [chr(n) for n in cs]
|
||||
return dict(zip(bs, cs))
|
||||
|
||||
|
||||
def count_model_parts(dir_model: str) -> int:
|
||||
num_parts = 0
|
||||
for filename in os.listdir(dir_model):
|
||||
if filename.startswith("pytorch_model-"):
|
||||
num_parts += 1
|
||||
|
||||
if num_parts > 0:
|
||||
print("gguf: found " + str(num_parts) + " model parts")
|
||||
return num_parts
|
||||
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
print(" ftype == 1 -> float16")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# output in the same directory as the model
|
||||
dir_model = sys.argv[1]
|
||||
last_dir = os.path.basename(os.path.normpath(dir_model))
|
||||
|
||||
# possible tensor data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if len(sys.argv) > 2:
|
||||
ftype = int(sys.argv[2])
|
||||
if ftype < 0 or ftype > 1:
|
||||
print("Invalid ftype: " + str(ftype))
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
|
||||
|
||||
print("gguf: loading model "+last_dir)
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
hparams = json.load(f)
|
||||
|
||||
if hparams["architectures"][0] != "GPTNeoXForCausalLM":
|
||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||
|
||||
sys.exit()
|
||||
|
||||
# get number of model parts
|
||||
num_parts = count_model_parts(dir_model)
|
||||
|
||||
ARCH=gguf.MODEL_ARCH.GPTNEOX
|
||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||
|
||||
print("gguf: get model metadata")
|
||||
|
||||
block_count = hparams["num_hidden_layers"]
|
||||
|
||||
gguf_writer.add_name(last_dir)
|
||||
gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||
gguf_writer.add_rope_dimension_count(int(hparams["rotary_pct"]*(hparams["hidden_size"]//hparams["num_attention_heads"])))
|
||||
gguf_writer.add_head_count(hparams["num_attention_heads"])
|
||||
gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
|
||||
gguf_writer.add_layer_norm_eps(hparams["layer_norm_eps"])
|
||||
|
||||
# TOKENIZATION
|
||||
|
||||
print("gguf: get tokenizer metadata")
|
||||
|
||||
tokens: List[str] = []
|
||||
merges: List[str] = []
|
||||
|
||||
|
||||
if Path(dir_model + "/tokenizer.json").is_file():
|
||||
# gpt2 tokenizer
|
||||
gguf_writer.add_tokenizer_model("gpt2")
|
||||
|
||||
print("gguf: get gpt2 tokenizer merges")
|
||||
|
||||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_json = json.load(f)
|
||||
merges = tokenizer_json["model"]["merges"]
|
||||
|
||||
gguf_writer.add_token_merges(merges)
|
||||
|
||||
print("gguf: get gpt2 tokenizer vocab")
|
||||
|
||||
vocab_size = len(tokenizer_json["model"]["vocab"])
|
||||
|
||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||
byte_encoder = bytes_to_unicode()
|
||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
||||
|
||||
for i in range(vocab_size):
|
||||
if i in reverse_vocab:
|
||||
try:
|
||||
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
||||
except KeyError:
|
||||
text = bytearray()
|
||||
for c in reverse_vocab[i]:
|
||||
if ord(c) < 256: # single byte character
|
||||
text.append(byte_decoder[ord(c)])
|
||||
else: # multibyte special token character
|
||||
text.extend(c.encode('utf-8'))
|
||||
else:
|
||||
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
||||
pad_token = f"[PAD{i}]".encode("utf8")
|
||||
text = bytearray(pad_token)
|
||||
|
||||
tokens.append(text)
|
||||
|
||||
gguf_writer.add_token_list(tokens)
|
||||
|
||||
if "added_tokens" in tokenizer_json and Path(dir_model + "/tokenizer_config.json").is_file():
|
||||
print("gguf: get special token ids")
|
||||
|
||||
with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_config = json.load(f)
|
||||
|
||||
# find special token ids
|
||||
|
||||
if "bos_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["bos_token"]:
|
||||
gguf_writer.add_bos_token_id(key["id"])
|
||||
|
||||
if "eos_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["eos_token"]:
|
||||
gguf_writer.add_eos_token_id(key["id"])
|
||||
|
||||
if "unk_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["unk_token"]:
|
||||
gguf_writer.add_unk_token_id(key["id"])
|
||||
|
||||
if "sep_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["sep_token"]:
|
||||
gguf_writer.add_sep_token_id(key["id"])
|
||||
|
||||
if "pad_token" in tokenizer_config:
|
||||
for key in tokenizer_json["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["pad_token"]:
|
||||
gguf_writer.add_pad_token_id(key["id"])
|
||||
|
||||
|
||||
# TENSORS
|
||||
|
||||
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
||||
|
||||
# tensor info
|
||||
print("gguf: get tensor metadata")
|
||||
|
||||
if num_parts == 0:
|
||||
part_names = ("pytorch_model.bin",)
|
||||
else:
|
||||
part_names = (
|
||||
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
||||
)
|
||||
|
||||
for part_name in part_names:
|
||||
print("gguf: loading model part '" + part_name + "'")
|
||||
model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
|
||||
|
||||
for name in model_part.keys():
|
||||
data = model_part[name]
|
||||
|
||||
# we don't need these
|
||||
if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
|
||||
continue
|
||||
|
||||
old_dtype = data.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data.dtype != torch.float16 and data.dtype != torch.float32:
|
||||
data = data.to(torch.float32)
|
||||
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
# map tensor names
|
||||
if name.endswith(".weight") and name[:-7] in tensor_map:
|
||||
name = tensor_map[name[:-7]] + ".weight"
|
||||
elif name.endswith(".bias") and name[:-5] in tensor_map:
|
||||
name = tensor_map[name[:-5]] + ".bias"
|
||||
else:
|
||||
print("Can not map tensor '" + name + "'")
|
||||
sys.exit()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
|
||||
gguf_writer.add_tensor(name, data)
|
||||
|
||||
|
||||
print("gguf: write header")
|
||||
gguf_writer.write_header_to_file()
|
||||
print("gguf: write metadata")
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
print("gguf: write tensors")
|
||||
gguf_writer.write_tensors_to_file()
|
||||
|
||||
gguf_writer.close()
|
||||
|
||||
print("gguf: model successfully exported to '" + fname_out + "'")
|
||||
print("")
|
||||
307
convert-llama-7b-pth-to-gguf.py
Normal file
307
convert-llama-7b-pth-to-gguf.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# 7b pth llama --> gguf conversion
|
||||
# Only models with a single datafile are supported, like 7B
|
||||
# HF files required in the model dir: config.json tokenizer_config.json tokenizer.json tokenizer.model
|
||||
|
||||
import gguf
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from typing import Any, List
|
||||
from pathlib import Path
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
#NDArray = np.ndarray[Any, Any]
|
||||
# compatible with python < 3.9
|
||||
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
|
||||
|
||||
|
||||
def count_model_parts(dir_model: str) -> int:
|
||||
num_parts = 0
|
||||
for filename in os.listdir(dir_model):
|
||||
if filename.startswith("consolidated."):
|
||||
num_parts += 1
|
||||
|
||||
if num_parts > 0:
|
||||
print("gguf: found " + str(num_parts) + " model parts")
|
||||
return num_parts
|
||||
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
print(" ftype == 1 -> float16")
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# output in the same directory as the model
|
||||
dir_model = sys.argv[1]
|
||||
last_dir = os.path.basename(os.path.normpath(dir_model))
|
||||
|
||||
|
||||
# possible tensor data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if len(sys.argv) > 2:
|
||||
ftype = int(sys.argv[2])
|
||||
if ftype < 0 or ftype > 1:
|
||||
print("Invalid ftype: " + str(ftype))
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
|
||||
|
||||
print("gguf: loading model "+last_dir)
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
hparams = json.load(f)
|
||||
|
||||
if hparams["architectures"][0] != "LlamaForCausalLM":
|
||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||
sys.exit()
|
||||
|
||||
# get number of model parts
|
||||
num_parts = count_model_parts(dir_model)
|
||||
|
||||
if num_parts > 1:
|
||||
print("gguf: Only models with a single datafile are supported.")
|
||||
|
||||
sys.exit()
|
||||
|
||||
ARCH=gguf.MODEL_ARCH.LLAMA
|
||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||
|
||||
|
||||
print("gguf: get model metadata")
|
||||
|
||||
block_count = hparams["num_hidden_layers"]
|
||||
head_count = hparams["num_attention_heads"]
|
||||
|
||||
if "num_key_value_heads" in hparams:
|
||||
head_count_kv = hparams["num_key_value_heads"]
|
||||
else:
|
||||
head_count_kv = head_count
|
||||
|
||||
if "_name_or_path" in hparams:
|
||||
hf_repo = hparams["_name_or_path"]
|
||||
else:
|
||||
hf_repo = ""
|
||||
|
||||
if "max_sequence_length" in hparams:
|
||||
ctx_length = hparams["max_sequence_length"]
|
||||
elif "max_position_embeddings" in hparams:
|
||||
ctx_length = hparams["max_position_embeddings"]
|
||||
else:
|
||||
print("gguf: can not find ctx length parameter.")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
gguf_writer.add_name(last_dir)
|
||||
gguf_writer.add_source_hf_repo(hf_repo)
|
||||
gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||
gguf_writer.add_context_length(ctx_length)
|
||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||
gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
|
||||
gguf_writer.add_head_count(head_count)
|
||||
gguf_writer.add_head_count_kv(head_count_kv)
|
||||
gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
|
||||
|
||||
if "rope_scaling" in hparams and hparams["rope_scaling"] != None and "factor" in hparams["rope_scaling"]:
|
||||
if "type" in hparams["rope_scaling"]:
|
||||
if hparams["rope_scaling"]["type"] == "linear":
|
||||
gguf_writer.add_rope_scale_linear(hparams["rope_scaling"]["factor"])
|
||||
|
||||
|
||||
# TOKENIZATION
|
||||
|
||||
print("gguf: get tokenizer metadata")
|
||||
|
||||
tokens: List[bytes] = []
|
||||
scores: List[float] = []
|
||||
toktypes: List[int] = []
|
||||
|
||||
if Path(dir_model + "/tokenizer.model").is_file():
|
||||
# vocab type sentencepiece
|
||||
print("gguf: get sentencepiece tokenizer vocab and scores")
|
||||
|
||||
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
||||
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
text: bytes
|
||||
score: float
|
||||
|
||||
piece = tokenizer.id_to_piece(i)
|
||||
text = piece.encode("utf-8")
|
||||
score = tokenizer.get_score(i)
|
||||
|
||||
toktype = 1 # defualt to normal token type
|
||||
if tokenizer.is_unknown(i):
|
||||
toktype = 2
|
||||
if tokenizer.is_control(i):
|
||||
toktype = 3
|
||||
|
||||
# toktype = 4 is user-defined = tokens from added_tokens.json
|
||||
|
||||
if tokenizer.is_unused(i):
|
||||
toktype = 5
|
||||
if tokenizer.is_byte(i):
|
||||
toktype = 6
|
||||
|
||||
tokens.append(text)
|
||||
scores.append(score)
|
||||
toktypes.append(toktype)
|
||||
|
||||
if Path(dir_model + "/added_tokens.json").is_file():
|
||||
with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
|
||||
addtokens_json = json.load(f)
|
||||
|
||||
print("gguf: get added tokens")
|
||||
|
||||
for key in addtokens_json:
|
||||
tokens.append( key.encode("utf-8") )
|
||||
scores.append(-1000.0)
|
||||
toktypes.append(4) # user-defined token type
|
||||
|
||||
gguf_writer.add_tokenizer_model("llama")
|
||||
gguf_writer.add_token_list(tokens)
|
||||
gguf_writer.add_token_scores(scores)
|
||||
gguf_writer.add_token_types(toktypes)
|
||||
|
||||
|
||||
print("gguf: get special token ids")
|
||||
|
||||
if Path(dir_model + "/tokenizer.json").is_file():
|
||||
# Look for special tokens in tokenizer.json if it exists
|
||||
|
||||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
tokenizer = json.load(f)
|
||||
|
||||
if "added_tokens" in tokenizer and Path(dir_model + "/tokenizer_config.json").is_file():
|
||||
|
||||
with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_config = json.load(f)
|
||||
|
||||
if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["bos_token"]["content"]:
|
||||
gguf_writer.add_bos_token_id(key["id"])
|
||||
|
||||
if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["eos_token"]["content"]:
|
||||
gguf_writer.add_eos_token_id(key["id"])
|
||||
|
||||
if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["unk_token"]["content"]:
|
||||
gguf_writer.add_unk_token_id(key["id"])
|
||||
|
||||
if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["sep_token"]["content"]:
|
||||
gguf_writer.add_sep_token_id(key["id"])
|
||||
|
||||
if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["pad_token"]["content"]:
|
||||
gguf_writer.add_pad_token_id(key["id"])
|
||||
else:
|
||||
# If no tokenizer.json: Look for special tokens in config.json
|
||||
|
||||
if "bos_token_id" in hparams and hparams["bos_token_id"] != None:
|
||||
gguf_writer.add_bos_token_id(hparams["bos_token_id"])
|
||||
|
||||
if "eos_token_id" in hparams and hparams["eos_token_id"] != None:
|
||||
gguf_writer.add_eos_token_id(hparams["eos_token_id"])
|
||||
|
||||
if "unk_token_id" in hparams and hparams["unk_token_id"] != None:
|
||||
gguf_writer.add_unk_token_id(hparams["unk_token_id"])
|
||||
|
||||
if "sep_token_id" in hparams and hparams["sep_token_id"] != None:
|
||||
gguf_writer.add_sep_token_id(hparams["sep_token_id"])
|
||||
|
||||
if "pad_token_id" in hparams and hparams["pad_token_id"] != None:
|
||||
gguf_writer.add_pad_token_id(hparams["pad_token_id"])
|
||||
|
||||
|
||||
# TENSORS
|
||||
|
||||
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
||||
|
||||
# tensor info
|
||||
print("gguf: get tensor metadata")
|
||||
|
||||
part_names = (f"consolidated.{n:02}.pth" for n in range(0, num_parts))
|
||||
|
||||
for part_name in part_names:
|
||||
print("gguf: loading model part '" + part_name + "'")
|
||||
model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
|
||||
|
||||
for name in model_part.keys():
|
||||
data = model_part[name]
|
||||
|
||||
# we don't need these
|
||||
if name == "rope.freqs":
|
||||
continue
|
||||
|
||||
old_dtype = data.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data.dtype != torch.float16 and data.dtype != torch.float32:
|
||||
data = data.to(torch.float32)
|
||||
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
# map tensor names
|
||||
if name.endswith(".weight") and name[:-7] in tensor_map:
|
||||
name = tensor_map[name[:-7]] + ".weight"
|
||||
elif name.endswith(".bias") and name[:-5] in tensor_map:
|
||||
name = tensor_map[name[:-5]] + ".bias"
|
||||
else:
|
||||
print("Can not map tensor '" + name + "'")
|
||||
sys.exit()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
|
||||
gguf_writer.add_tensor(name, data)
|
||||
|
||||
|
||||
print("gguf: write header")
|
||||
gguf_writer.write_header_to_file()
|
||||
print("gguf: write metadata")
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
print("gguf: write tensors")
|
||||
gguf_writer.write_tensors_to_file()
|
||||
|
||||
gguf_writer.close()
|
||||
|
||||
|
||||
print("gguf: model successfully exported to '" + fname_out + "'")
|
||||
print("")
|
||||
333
convert-llama-ggmlv3-to-gguf.py
Normal file
333
convert-llama-ggmlv3-to-gguf.py
Normal file
@@ -0,0 +1,333 @@
|
||||
import sys, struct, math, argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gguf
|
||||
|
||||
# Note: Does not support GGML_QKK_64
|
||||
QK_K = 256
|
||||
# Items here are (block size, type size)
|
||||
GGML_QUANT_SIZES = {
|
||||
gguf.GGMLQuantizationType.F32 : (1, 4),
|
||||
gguf.GGMLQuantizationType.F16 : (1, 2),
|
||||
gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16),
|
||||
gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
|
||||
gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
|
||||
gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
|
||||
gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32),
|
||||
gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
|
||||
gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
||||
gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
||||
gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
|
||||
gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||
gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||
gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
|
||||
}
|
||||
|
||||
class Hyperparameters:
|
||||
def __init__(self):
|
||||
self.n_vocab = self.n_embd = self.n_mult = self.n_head = self.n_layer = self.n_rot = self.ftype = 0
|
||||
self.n_ff = 0
|
||||
|
||||
def set_n_ff(self, model):
|
||||
ff_tensor_idx = model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
|
||||
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
|
||||
ff_tensor = model.tensors[ff_tensor_idx]
|
||||
self.n_ff = ff_tensor.dims[1]
|
||||
|
||||
def load(self, data, offset):
|
||||
(
|
||||
self.n_vocab,
|
||||
self.n_embd,
|
||||
self.n_mult,
|
||||
self.n_head,
|
||||
self.n_layer,
|
||||
self.n_rot,
|
||||
self.ftype,
|
||||
) = struct.unpack('<7I', data[offset:offset + (4 * 7)])
|
||||
return 4 * 7
|
||||
|
||||
def __str__(self):
|
||||
return f'<Hyperparameters: n_vocab={self.n_vocab}, n_embd={self.n_embd}, n_mult={self.n_mult}, n_head={self.n_head}, n_layer={self.n_layer}, n_rot={self.n_rot}, n_ff={self.n_ff}, ftype={self.ftype}>'
|
||||
|
||||
class Vocab:
|
||||
def __init__(self):
|
||||
self.items = []
|
||||
|
||||
def load(self, data, offset, n_vocab):
|
||||
orig_offset = offset
|
||||
for _ in range(n_vocab):
|
||||
itemlen = struct.unpack('<I', data[offset:offset + 4])[0]
|
||||
assert itemlen < 4096, 'Absurd vocab item length'
|
||||
offset += 4
|
||||
vocab = bytes(data[offset:offset + itemlen])
|
||||
offset += itemlen
|
||||
score = struct.unpack('<f', data[offset:offset + 4])[0]
|
||||
offset += 4
|
||||
self.items.append((vocab, score))
|
||||
return offset - orig_offset
|
||||
|
||||
class Tensor:
|
||||
def __init__(self):
|
||||
self.name = None
|
||||
self.dims = ()
|
||||
self.dtype = None
|
||||
self.start_offset = 0
|
||||
self.len_bytes = 0
|
||||
|
||||
def load(self, data, offset):
|
||||
orig_offset = offset
|
||||
(n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
|
||||
assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
|
||||
assert name_len < 4096, 'Absurd tensor name length'
|
||||
quant = GGML_QUANT_SIZES.get(dtype)
|
||||
assert quant is not None, 'Unknown tensor type'
|
||||
(blksize, tysize) = quant
|
||||
offset += 12
|
||||
self.dtype= dtype
|
||||
self.dims = struct.unpack(f'<{n_dims}I', data[offset:offset + (4 * n_dims)])
|
||||
offset += 4 * n_dims
|
||||
self.name = bytes(data[offset:offset + name_len])
|
||||
offset += name_len
|
||||
pad = ((offset + 31) & ~31) - offset
|
||||
offset += pad
|
||||
n_elems = np.prod(self.dims)
|
||||
n_bytes = (n_elems * tysize) // blksize
|
||||
self.start_offset = offset
|
||||
self.len_bytes = n_bytes
|
||||
offset += n_bytes
|
||||
# print(n_dims, name_len, dtype, self.dims, self.name, pad)
|
||||
return offset - orig_offset
|
||||
|
||||
class GGMLV3Model:
|
||||
def __init__(self):
|
||||
self.hyperparameters = None
|
||||
self.vocab = None
|
||||
self.tensor_map = {}
|
||||
self.tensors = []
|
||||
|
||||
def validate_header(self, data, offset):
|
||||
if bytes(data[offset:offset + 4]) != b'tjgg' or struct.unpack('<I', data[offset + 4:offset + 8])[0] != 3:
|
||||
raise ValueError('Only GGJTv3 supported')
|
||||
return 8
|
||||
|
||||
def load(self, data, offset):
|
||||
offset += self.validate_header(data, offset)
|
||||
hp = Hyperparameters()
|
||||
offset += hp.load(data, offset)
|
||||
vocab = Vocab()
|
||||
offset += vocab.load(data, offset, hp.n_vocab)
|
||||
tensors = []
|
||||
tensor_map = {}
|
||||
while offset < len(data):
|
||||
tensor = Tensor()
|
||||
offset += tensor.load(data, offset)
|
||||
tensor_map[tensor.name] = len(tensors)
|
||||
tensors.append(tensor)
|
||||
self.hyperparameters = hp
|
||||
self.vocab = vocab
|
||||
self.tensors = tensors
|
||||
self.tensor_map = tensor_map
|
||||
hp.set_n_ff(self)
|
||||
return offset
|
||||
|
||||
class GGMLToGGUF:
|
||||
def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None):
|
||||
hp = ggml_model.hyperparameters
|
||||
self.model = ggml_model
|
||||
self.data = data
|
||||
self.cfg = cfg
|
||||
self.params_override = params_override
|
||||
self.vocab_override = vocab_override
|
||||
if params_override is not None:
|
||||
n_kv_head = params_override.n_head_kv
|
||||
else:
|
||||
if cfg.gqa == 1:
|
||||
n_kv_head = hp.n_head
|
||||
else:
|
||||
gqa = float(cfg.gqa)
|
||||
n_kv_head = None
|
||||
for x in range(1, 256):
|
||||
if float(hp.n_head) / float(x) == gqa:
|
||||
n_kv_head = x
|
||||
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
|
||||
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
|
||||
self.n_kv_head = n_kv_head
|
||||
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
|
||||
|
||||
def save(self):
|
||||
print('* Preparing to save GGUF file')
|
||||
gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
|
||||
self.add_params(gguf_writer)
|
||||
self.add_vocab(gguf_writer)
|
||||
self.add_tensors(gguf_writer)
|
||||
print(" gguf: write header")
|
||||
gguf_writer.write_header_to_file()
|
||||
print(" gguf: write metadata")
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
print(" gguf: write tensors")
|
||||
gguf_writer.write_tensors_to_file()
|
||||
gguf_writer.close()
|
||||
|
||||
def add_params(self, gguf_writer):
|
||||
hp = self.model.hyperparameters
|
||||
cfg = self.cfg
|
||||
desc = cfg.desc if cfg.desc is not None else 'converted from legacy GGJTv3 format'
|
||||
try:
|
||||
# Filenames aren't necessarily valid UTF8.
|
||||
name = cfg.name if cfg.name is not None else cfg.input.name
|
||||
except UnicodeDecodeError:
|
||||
name = None
|
||||
print('* Adding model parameters and KV items')
|
||||
if name is not None:
|
||||
gguf_writer.add_name(name)
|
||||
gguf_writer.add_description(desc)
|
||||
if self.params_override is not None:
|
||||
po = self.params_override
|
||||
assert po.n_embd == hp.n_embd, 'Model hyperparams mismatch'
|
||||
assert po.n_layer == hp.n_layer, 'Model hyperparams mismatch'
|
||||
assert po.n_head == hp.n_head, 'Model hyperparams mismatch'
|
||||
gguf_writer.add_context_length (po.n_ctx)
|
||||
gguf_writer.add_embedding_length (po.n_embd)
|
||||
gguf_writer.add_block_count (po.n_layer)
|
||||
gguf_writer.add_feed_forward_length (po.n_ff)
|
||||
gguf_writer.add_rope_dimension_count(po.n_embd // po.n_head)
|
||||
gguf_writer.add_head_count (po.n_head)
|
||||
gguf_writer.add_head_count_kv (po.n_head_kv)
|
||||
gguf_writer.add_layer_norm_rms_eps (po.f_norm_eps)
|
||||
return
|
||||
gguf_writer.add_context_length(cfg.context_length)
|
||||
gguf_writer.add_embedding_length(hp.n_embd)
|
||||
gguf_writer.add_block_count(hp.n_layer)
|
||||
gguf_writer.add_feed_forward_length(hp.n_ff)
|
||||
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
|
||||
gguf_writer.add_head_count(hp.n_head)
|
||||
gguf_writer.add_head_count_kv(self.n_kv_head)
|
||||
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
|
||||
|
||||
def add_vocab(self, gguf_writer):
|
||||
hp = self.model.hyperparameters
|
||||
gguf_writer.add_tokenizer_model('llama')
|
||||
tokens = []
|
||||
scores = []
|
||||
toktypes = []
|
||||
if self.vocab_override is not None:
|
||||
vo = self.vocab_override
|
||||
print('* Adding vocab item(s)')
|
||||
for (idx, vitem) in enumerate(vo.all_tokens()):
|
||||
if len(vitem) == 3:
|
||||
tokens.append(vitem[0])
|
||||
scores.append(vitem[1])
|
||||
toktypes.append(vitem[2])
|
||||
else:
|
||||
# Maybe try to guess the token type here?
|
||||
tokens.append(vitem[0])
|
||||
scores.append(vitem[1])
|
||||
assert len(tokens) == hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
|
||||
gguf_writer.add_token_list(tokens)
|
||||
gguf_writer.add_token_scores(scores)
|
||||
if len(toktypes) > 0:
|
||||
gguf_writer.add_token_types(toktypes)
|
||||
return
|
||||
print(f'* Adding {hp.n_vocab} vocab item(s)')
|
||||
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
|
||||
tt = 1 # Normal
|
||||
if len(vbytes) == 0:
|
||||
tt = 3 # Control
|
||||
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
|
||||
vbytes = bytes(f'<0x{vbytes[0]:02X}>', encoding = 'UTF-8')
|
||||
tt = 6 # Byte
|
||||
else:
|
||||
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
|
||||
toktypes.append(tt)
|
||||
tokens.append(vbytes)
|
||||
scores.append(vscore)
|
||||
gguf_writer.add_token_list(tokens)
|
||||
gguf_writer.add_token_scores(scores)
|
||||
gguf_writer.add_token_types(toktypes)
|
||||
|
||||
def add_tensors(self, gguf_writer):
|
||||
nm = self.name_map
|
||||
data = self.data
|
||||
print(f'* Adding {len(self.model.tensors)} tensor(s)')
|
||||
for tensor in self.model.tensors:
|
||||
name = str(tensor.name, 'UTF-8')
|
||||
if name.endswith('.weight'):
|
||||
name = name[:-7]
|
||||
suffix = '.weight'
|
||||
elif name.endswith('.bias'):
|
||||
name = name[:-5]
|
||||
suffix = '.bias'
|
||||
mapped_name = nm.get(name)
|
||||
assert mapped_name is not None, f'Bad name {name}'
|
||||
mapped_name += suffix
|
||||
tempdims = list(tensor.dims[:])
|
||||
if len(tempdims) > 1:
|
||||
temp = tempdims[1]
|
||||
tempdims[1] = tempdims[0]
|
||||
tempdims[0] = temp
|
||||
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
|
||||
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
|
||||
|
||||
def handle_metadata(cfg, hp):
|
||||
import convert
|
||||
assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
|
||||
hf_config_path = cfg.model_metadata_dir / "config.json"
|
||||
orig_config_path = cfg.model_metadata_dir / "params.json"
|
||||
# We pass a fake model here. "original" mode will check the shapes of some
|
||||
# tensors if information is missing in the .json file: other than that, the
|
||||
# model data isn't used so this should be safe (at least for now).
|
||||
fakemodel = {
|
||||
'tok_embeddings.weight': convert.LazyTensor.__new__(convert.LazyTensor),
|
||||
'layers.0.feed_forward.w1.weight': convert.LazyTensor.__new__(convert.LazyTensor),
|
||||
}
|
||||
fakemodel['tok_embeddings.weight'].shape = [hp.n_vocab]
|
||||
fakemodel['layers.0.feed_forward.w1.weight'].shape = [hp.n_ff]
|
||||
if hf_config_path.exists():
|
||||
params = convert.Params.loadHFTransformerJson(fakemodel, hf_config_path)
|
||||
elif orig_config_path.exists():
|
||||
params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
|
||||
else:
|
||||
raise ValueError('Unable to load metadata')
|
||||
vocab = convert.load_vocab(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir, cfg.vocabtype)
|
||||
convert.check_vocab_size(params, vocab)
|
||||
return (params, vocab)
|
||||
|
||||
def handle_args():
|
||||
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
|
||||
parser.add_argument('--input', '-i', type = Path, help = 'Input GGMLv3 filename')
|
||||
parser.add_argument('--output', '-o', type = Path, help ='Output GGUF filename')
|
||||
parser.add_argument('--name', help = 'Set model name')
|
||||
parser.add_argument('--desc', help = 'Set model description')
|
||||
parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
|
||||
parser.add_argument('--eps', default = '5.0e-06', help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
|
||||
parser.add_argument('--context-length', '-c', type=int, default = 2048, help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
|
||||
parser.add_argument('--model-metadata-dir', '-m', type = Path, help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
|
||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
|
||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)", default="spm")
|
||||
return parser.parse_args()
|
||||
|
||||
def main():
|
||||
cfg = handle_args()
|
||||
print(f'* Using config: {cfg}')
|
||||
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
|
||||
data = np.memmap(cfg.input, mode = 'r')
|
||||
model = GGMLV3Model()
|
||||
print('* Scanning GGML input file')
|
||||
offset = model.load(data, 0)
|
||||
print(f'* GGML model hyperparameters: {model.hyperparameters}')
|
||||
vocab_override = None
|
||||
params_override = None
|
||||
if cfg.model_metadata_dir is not None:
|
||||
(params_override, vocab_override) = handle_metadata(cfg, model.hyperparameters)
|
||||
print('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.')
|
||||
print(f'* Overriding params: {params_override}')
|
||||
print(f'* Overriding vocab: {vocab_override}')
|
||||
else:
|
||||
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
|
||||
converter = GGMLToGGUF(model, data, cfg, params_override = params_override, vocab_override = vocab_override)
|
||||
converter.save()
|
||||
print(f'* Successful completion. Output saved to: {cfg.output}')
|
||||
|
||||
main()
|
||||
327
convert-llama-hf-to-gguf.py
Normal file
327
convert-llama-hf-to-gguf.py
Normal file
@@ -0,0 +1,327 @@
|
||||
# HF llama --> gguf conversion
|
||||
|
||||
import gguf
|
||||
import os
|
||||
import sys
|
||||
import struct
|
||||
import json
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from typing import Any, List, Optional
|
||||
from pathlib import Path
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
#NDArray = np.ndarray[Any, Any]
|
||||
# compatible with python < 3.9
|
||||
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
|
||||
|
||||
# reverse HF permute back to original pth layout
|
||||
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py
|
||||
|
||||
|
||||
def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
|
||||
if n_kv_head is not None and n_head != n_kv_head:
|
||||
n_head //= n_kv_head
|
||||
|
||||
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
||||
.swapaxes(1, 2)
|
||||
.reshape(weights.shape))
|
||||
|
||||
|
||||
def count_model_parts(dir_model: str) -> int:
|
||||
num_parts = 0
|
||||
|
||||
for filename in os.listdir(dir_model):
|
||||
if filename.startswith("pytorch_model-"):
|
||||
num_parts += 1
|
||||
|
||||
if num_parts > 0:
|
||||
print("gguf: found " + str(num_parts) + " model parts")
|
||||
|
||||
return num_parts
|
||||
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
print(" ftype == 1 -> float16")
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# output in the same directory as the model
|
||||
dir_model = sys.argv[1]
|
||||
last_dir = os.path.basename(os.path.normpath(dir_model))
|
||||
|
||||
|
||||
# possible tensor data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
|
||||
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if len(sys.argv) > 2:
|
||||
ftype = int(sys.argv[2])
|
||||
if ftype < 0 or ftype > 1:
|
||||
print("Invalid ftype: " + str(ftype))
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf"
|
||||
|
||||
print("gguf: loading model "+last_dir)
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
hparams = json.load(f)
|
||||
|
||||
if hparams["architectures"][0] != "LlamaForCausalLM":
|
||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||
|
||||
sys.exit()
|
||||
|
||||
# get number of model parts
|
||||
num_parts = count_model_parts(dir_model)
|
||||
|
||||
ARCH=gguf.MODEL_ARCH.LLAMA
|
||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||
|
||||
print("gguf: get model metadata")
|
||||
|
||||
block_count = hparams["num_hidden_layers"]
|
||||
head_count = hparams["num_attention_heads"]
|
||||
|
||||
if "num_key_value_heads" in hparams:
|
||||
head_count_kv = hparams["num_key_value_heads"]
|
||||
else:
|
||||
head_count_kv = head_count
|
||||
|
||||
if "_name_or_path" in hparams:
|
||||
hf_repo = hparams["_name_or_path"]
|
||||
else:
|
||||
hf_repo = ""
|
||||
|
||||
if "max_sequence_length" in hparams:
|
||||
ctx_length = hparams["max_sequence_length"]
|
||||
elif "max_position_embeddings" in hparams:
|
||||
ctx_length = hparams["max_position_embeddings"]
|
||||
else:
|
||||
print("gguf: can not find ctx length parameter.")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
gguf_writer.add_name(last_dir)
|
||||
gguf_writer.add_source_hf_repo(hf_repo)
|
||||
gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||
gguf_writer.add_context_length(ctx_length)
|
||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||
gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
|
||||
gguf_writer.add_head_count(head_count)
|
||||
gguf_writer.add_head_count_kv(head_count_kv)
|
||||
gguf_writer.add_layer_norm_rms_eps(hparams["rms_norm_eps"])
|
||||
|
||||
if "rope_scaling" in hparams and hparams["rope_scaling"] != None and "factor" in hparams["rope_scaling"]:
|
||||
if "type" in hparams["rope_scaling"]:
|
||||
if hparams["rope_scaling"]["type"] == "linear":
|
||||
gguf_writer.add_rope_scale_linear(hparams["rope_scaling"]["factor"])
|
||||
|
||||
|
||||
# TOKENIZATION
|
||||
|
||||
print("gguf: get tokenizer metadata")
|
||||
|
||||
tokens: List[bytes] = []
|
||||
scores: List[float] = []
|
||||
toktypes: List[int] = []
|
||||
|
||||
if Path(dir_model + "/tokenizer.model").is_file():
|
||||
# vocab type sentencepiece
|
||||
print("gguf: get sentencepiece tokenizer vocab, scores and token types")
|
||||
|
||||
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
||||
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
text: bytes
|
||||
score: float
|
||||
|
||||
piece = tokenizer.id_to_piece(i)
|
||||
text = piece.encode("utf-8")
|
||||
score = tokenizer.get_score(i)
|
||||
|
||||
toktype = 1 # defualt to normal token type
|
||||
if tokenizer.is_unknown(i):
|
||||
toktype = 2
|
||||
if tokenizer.is_control(i):
|
||||
toktype = 3
|
||||
|
||||
# toktype = 4 is user-defined = tokens from added_tokens.json
|
||||
|
||||
if tokenizer.is_unused(i):
|
||||
toktype = 5
|
||||
if tokenizer.is_byte(i):
|
||||
toktype = 6
|
||||
|
||||
tokens.append(text)
|
||||
scores.append(score)
|
||||
toktypes.append(toktype)
|
||||
|
||||
if Path(dir_model + "/added_tokens.json").is_file():
|
||||
with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
|
||||
addtokens_json = json.load(f)
|
||||
|
||||
print("gguf: get added tokens")
|
||||
|
||||
for key in addtokens_json:
|
||||
tokens.append( key.encode("utf-8") )
|
||||
scores.append(-1000.0)
|
||||
toktypes.append(4) # user-defined token type
|
||||
|
||||
|
||||
gguf_writer.add_tokenizer_model("llama")
|
||||
gguf_writer.add_token_list(tokens)
|
||||
gguf_writer.add_token_scores(scores)
|
||||
gguf_writer.add_token_types(toktypes)
|
||||
|
||||
|
||||
print("gguf: get special token ids")
|
||||
|
||||
if Path(dir_model + "/tokenizer.json").is_file():
|
||||
# Look for special tokens in tokenizer.json if it exists
|
||||
|
||||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
tokenizer = json.load(f)
|
||||
|
||||
if "added_tokens" in tokenizer and Path(dir_model + "/tokenizer_config.json").is_file():
|
||||
|
||||
with open(dir_model + "/tokenizer_config.json", "r", encoding="utf-8") as f:
|
||||
tokenizer_config = json.load(f)
|
||||
|
||||
if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["bos_token"]["content"]:
|
||||
gguf_writer.add_bos_token_id(key["id"])
|
||||
|
||||
if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["eos_token"]["content"]:
|
||||
gguf_writer.add_eos_token_id(key["id"])
|
||||
|
||||
if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["unk_token"]["content"]:
|
||||
gguf_writer.add_unk_token_id(key["id"])
|
||||
|
||||
if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["sep_token"]["content"]:
|
||||
gguf_writer.add_sep_token_id(key["id"])
|
||||
|
||||
if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] != None:
|
||||
for key in tokenizer["added_tokens"]:
|
||||
if key["content"] == tokenizer_config["pad_token"]["content"]:
|
||||
gguf_writer.add_pad_token_id(key["id"])
|
||||
else:
|
||||
# If no tokenizer.json: Look for special tokens in config.json
|
||||
|
||||
if "bos_token_id" in hparams and hparams["bos_token_id"] != None:
|
||||
gguf_writer.add_bos_token_id(hparams["bos_token_id"])
|
||||
|
||||
if "eos_token_id" in hparams and hparams["eos_token_id"] != None:
|
||||
gguf_writer.add_eos_token_id(hparams["eos_token_id"])
|
||||
|
||||
if "unk_token_id" in hparams and hparams["unk_token_id"] != None:
|
||||
gguf_writer.add_unk_token_id(hparams["unk_token_id"])
|
||||
|
||||
if "sep_token_id" in hparams and hparams["sep_token_id"] != None:
|
||||
gguf_writer.add_sep_token_id(hparams["sep_token_id"])
|
||||
|
||||
if "pad_token_id" in hparams and hparams["pad_token_id"] != None:
|
||||
gguf_writer.add_pad_token_id(hparams["pad_token_id"])
|
||||
|
||||
|
||||
# TENSORS
|
||||
|
||||
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
||||
|
||||
# tensor info
|
||||
print("gguf: get tensor metadata")
|
||||
|
||||
if num_parts == 0:
|
||||
part_names = ("pytorch_model.bin",)
|
||||
else:
|
||||
part_names = (
|
||||
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
||||
)
|
||||
|
||||
for part_name in part_names:
|
||||
print("gguf: loading model part '" + part_name + "'")
|
||||
model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
|
||||
|
||||
for name in model_part.keys():
|
||||
data = model_part[name]
|
||||
|
||||
# we don't need these
|
||||
if name.endswith(".rotary_emb.inv_freq"):
|
||||
continue
|
||||
|
||||
old_dtype = data.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data.dtype != torch.float16 and data.dtype != torch.float32:
|
||||
data = data.to(torch.float32)
|
||||
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
# reverse permute these
|
||||
if name.endswith(".q_proj.weight"):
|
||||
data = reverse_hf_permute(data, head_count)
|
||||
if name.endswith(".k_proj.weight"):
|
||||
data = reverse_hf_permute(data, head_count, head_count_kv)
|
||||
|
||||
# map tensor names
|
||||
if name.endswith(".weight") and name[:-7] in tensor_map:
|
||||
name = tensor_map[name[:-7]] + ".weight"
|
||||
elif name.endswith(".bias") and name[:-5] in tensor_map:
|
||||
name = tensor_map[name[:-5]] + ".bias"
|
||||
else:
|
||||
print("Can not map tensor '" + name + "'")
|
||||
sys.exit()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
|
||||
gguf_writer.add_tensor(name, data)
|
||||
|
||||
|
||||
print("gguf: write header")
|
||||
gguf_writer.write_header_to_file()
|
||||
print("gguf: write metadata")
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
print("gguf: write tensors")
|
||||
gguf_writer.write_tensors_to_file()
|
||||
|
||||
gguf_writer.close()
|
||||
|
||||
|
||||
print("gguf: model successfully exported to '" + fname_out + "'")
|
||||
print("")
|
||||
@@ -1,13 +0,0 @@
|
||||
# Compatibility stub
|
||||
|
||||
import argparse
|
||||
|
||||
import convert
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""[DEPRECATED - use `convert.py` instead]
|
||||
Convert a LLaMA model checkpoint to a ggml compatible file""")
|
||||
parser.add_argument('dir_model', help='directory containing the model checkpoint')
|
||||
parser.add_argument('ftype', help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1)
|
||||
args = parser.parse_args()
|
||||
convert.main(['--outtype', 'f16' if args.ftype == 1 else 'f32', '--', args.dir_model])
|
||||
1042
convert.py
1042
convert.py
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@
|
||||
## Verifying that the model is running on the GPU with cuBLAS
|
||||
Make sure you compiled llama with the correct env variables according to [this guide](../README.md#cublas), so that llama accepts the `-ngl N` (or `--n-gpu-layers N`) flag. When running llama, you may configure `N` to be very large, and llama will offload the maximum possible number of layers to the GPU, even if it's less than the number you configured. For example:
|
||||
```shell
|
||||
./main -m "path/to/model.bin" -ngl 200000 -p "Please sir, may I have some "
|
||||
./main -m "path/to/model.gguf" -ngl 200000 -p "Please sir, may I have some "
|
||||
```
|
||||
|
||||
When running llama, before it starts the inference work, it will output diagnostic information that shows whether cuBLAS is offloading work to the GPU. Look for these lines:
|
||||
@@ -25,9 +25,9 @@ GPU: A6000 (48GB VRAM)
|
||||
CPU: 7 physical cores
|
||||
RAM: 32GB
|
||||
|
||||
Model: `TheBloke_Wizard-Vicuna-30B-Uncensored-GGML/Wizard-Vicuna-30B-Uncensored.ggmlv3.q4_0.bin` (30B parameters, 4bit quantization, GGML)
|
||||
Model: `TheBloke_Wizard-Vicuna-30B-Uncensored-GGML/Wizard-Vicuna-30B-Uncensored.q4_0.gguf` (30B parameters, 4bit quantization, GGML)
|
||||
|
||||
Run command: `./main -m "path/to/model.bin" -p "-p "An extremely detailed description of the 10 best ethnic dishes will follow, with recipes: " -n 1000 [additional benchmark flags]`
|
||||
Run command: `./main -m "path/to/model.gguf" -p "An extremely detailed description of the 10 best ethnic dishes will follow, with recipes: " -n 1000 [additional benchmark flags]`
|
||||
|
||||
Result:
|
||||
|
||||
|
||||
@@ -6,25 +6,6 @@ find_package(Threads REQUIRED)
|
||||
|
||||
# ...
|
||||
|
||||
# common
|
||||
|
||||
set(TARGET common)
|
||||
|
||||
add_library(${TARGET} OBJECT
|
||||
common.h
|
||||
common.cpp
|
||||
grammar-parser.h
|
||||
grammar-parser.cpp
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC .)
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
||||
target_link_libraries(${TARGET} PRIVATE llama)
|
||||
|
||||
# examples
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
@@ -40,8 +21,10 @@ else()
|
||||
add_subdirectory(benchmark)
|
||||
add_subdirectory(baby-llama)
|
||||
add_subdirectory(train-text-from-scratch)
|
||||
add_subdirectory(convert-llama2c-to-ggml)
|
||||
add_subdirectory(simple)
|
||||
add_subdirectory(embd-input)
|
||||
add_subdirectory(llama-bench)
|
||||
if (LLAMA_METAL)
|
||||
add_subdirectory(metal)
|
||||
endif()
|
||||
|
||||
5
examples/convert-llama2c-to-ggml/CMakeLists.txt
Normal file
5
examples/convert-llama2c-to-ggml/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
set(TARGET convert-llama2c-to-ggml)
|
||||
add_executable(${TARGET} convert-llama2c-to-ggml.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
26
examples/convert-llama2c-to-ggml/README.md
Normal file
26
examples/convert-llama2c-to-ggml/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
## Convert llama2.c model to ggml
|
||||
|
||||
This example reads weights from project [llama2.c](https://github.com/karpathy/llama2.c) and saves them in ggml compatible format. The vocab that is available in `models/ggml-vocab.bin` is used by default.
|
||||
|
||||
To convert the model first download the models from the [llma2.c](https://github.com/karpathy/llama2.c) repository:
|
||||
|
||||
`$ make -j`
|
||||
|
||||
After successful compilation, following usage options are available:
|
||||
```
|
||||
usage: ./convert-llama2c-to-ggml [options]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--copy-vocab-from-model FNAME model path from which to copy vocab (default 'models/ggml-vocab.bin')
|
||||
--llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model
|
||||
--llama2c-output-model FNAME model path to save the converted llama2.c model (default ak_llama_model.bin')
|
||||
```
|
||||
|
||||
An example command is as follows:
|
||||
|
||||
`$ ./convert-llama2c-to-ggml --copy-vocab-from-model <ggml-vocab.bin> --llama2c-model <llama2.c model path> --llama2c-output-model <ggml output model path>`
|
||||
|
||||
Now you can use the model with command like:
|
||||
|
||||
`$ ./main -m <ggml output model path> -p "One day, Lily met a Shoggoth" -n 500 -c 256 -eps 1e-5`
|
||||
827
examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Normal file
827
examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Normal file
@@ -0,0 +1,827 @@
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <cstdarg>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////// llama2.c model structs and functions to load models, alloc memory etc.
|
||||
typedef struct {
|
||||
int dim; // transformer dimension
|
||||
int hidden_dim; // for ffn layers
|
||||
int n_layers; // number of layers
|
||||
int n_heads; // number of query heads
|
||||
int n_kv_heads; // number of key/value heads (can be < query heads because of multiquery)
|
||||
int vocab_size; // vocabulary size, usually 256 (byte-level)
|
||||
int seq_len; // max sequence length
|
||||
} Config;
|
||||
|
||||
typedef struct {
|
||||
// token embedding table
|
||||
float* token_embedding_table; // (vocab_size, dim)
|
||||
// weights for rmsnorms
|
||||
float* rms_att_weight; // (layer, dim) rmsnorm weights
|
||||
float* rms_ffn_weight; // (layer, dim)
|
||||
// weights for matmuls
|
||||
float* wq; // (layer, dim, dim)
|
||||
float* wk; // (layer, dim, dim)
|
||||
float* wv; // (layer, dim, dim)
|
||||
float* wo; // (layer, dim, dim)
|
||||
// weights for ffn
|
||||
float* w1; // (layer, hidden_dim, dim)
|
||||
float* w2; // (layer, dim, hidden_dim)
|
||||
float* w3; // (layer, hidden_dim, dim)
|
||||
// final rmsnorm
|
||||
float* rms_final_weight; // (dim,)
|
||||
// freq_cis for RoPE relatively positional embeddings
|
||||
// float* freq_cis_real; // (seq_len, dim/2)
|
||||
// float* freq_cis_imag; // (seq_len, dim/2)
|
||||
// (optional) classifier weights for the logits, on the last layer
|
||||
//float* wcls;
|
||||
} TransformerWeights;
|
||||
|
||||
void malloc_weights(TransformerWeights* w, Config* p) {
|
||||
// we calloc instead of malloc to keep valgrind happy
|
||||
w->token_embedding_table = new float[p->vocab_size * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->token_embedding_table\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim);
|
||||
|
||||
w->rms_att_weight = new float[p->n_layers * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->rms_att_weight\n",__func__,p->n_layers, p->dim, p->n_layers * p->dim);
|
||||
|
||||
w->rms_ffn_weight = new float[p->n_layers * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->rms_ffn_weight\n",__func__,p->n_layers , p->dim, p->n_layers * p->dim);
|
||||
|
||||
w->wq = new float[p->n_layers * p->dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wq\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
|
||||
|
||||
w->wk = new float[p->n_layers * p->dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wk\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
|
||||
|
||||
w->wv = new float[p->n_layers * p->dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wv\n",__func__, p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
|
||||
|
||||
w->wo = new float[p->n_layers * p->dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->wo\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
|
||||
|
||||
w->w1 = new float[p->n_layers * p->hidden_dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w1\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
||||
|
||||
w->w2 = new float[p->n_layers * p->hidden_dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w2\n",__func__,p->n_layers, p->dim, p->hidden_dim, p->n_layers * p->hidden_dim * p->dim);
|
||||
|
||||
w->w3 = new float[p->n_layers * p->hidden_dim * p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w3\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
||||
|
||||
w->rms_final_weight = new float[p->dim]();
|
||||
printf("[%s:AK] Allocating [%d] float space for w->rms_final_weight\n",__func__,p->dim);
|
||||
}
|
||||
|
||||
int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f) {
|
||||
if (fread(w->token_embedding_table, sizeof(float), p->vocab_size * p->dim, f) != static_cast<size_t>(p->vocab_size * p->dim)) return 1;
|
||||
if (fread(w->rms_att_weight, sizeof(float), p->n_layers * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim)) return 1;
|
||||
if (fread(w->wq, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->dim)) return 1;
|
||||
if (fread(w->wk, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->dim)) return 1;
|
||||
if (fread(w->wv, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->dim)) return 1;
|
||||
if (fread(w->wo, sizeof(float), p->n_layers * p->dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->dim)) return 1;
|
||||
if (fread(w->rms_ffn_weight, sizeof(float), p->n_layers * p->dim, f) != static_cast<size_t>(p->n_layers * p->dim)) return 1;
|
||||
if (fread(w->w1, sizeof(float), p->n_layers * p->dim * p->hidden_dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->hidden_dim)) return 1;
|
||||
if (fread(w->w2, sizeof(float), p->n_layers * p->hidden_dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->hidden_dim * p->dim)) return 1;
|
||||
if (fread(w->w3, sizeof(float), p->n_layers * p->dim * p->hidden_dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->hidden_dim)) return 1;
|
||||
if (fread(w->rms_final_weight, sizeof(float), p->dim, f) != static_cast<size_t>(p->dim)) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void free_weights(TransformerWeights* w) {
|
||||
delete w->token_embedding_table;
|
||||
delete w->rms_att_weight;
|
||||
delete w->rms_ffn_weight;
|
||||
delete w->wq;
|
||||
delete w->wk;
|
||||
delete w->wv;
|
||||
delete w->wo;
|
||||
delete w->w1;
|
||||
delete w->w2;
|
||||
delete w->w3;
|
||||
delete w->rms_final_weight;
|
||||
}
|
||||
|
||||
void print_sample_weights(TransformerWeights *w){
|
||||
printf("----- Quick print of first of the weight vales of all the variables\n");
|
||||
printf("%f\n", w->token_embedding_table[0]);
|
||||
printf("%f\n", w->rms_att_weight[0]);
|
||||
printf("%f\n", w->rms_ffn_weight[0]);
|
||||
|
||||
printf("%f\n", w->wq[0]);
|
||||
printf("%f\n", w->wk[0]);
|
||||
printf("%f\n", w->wv[0]);
|
||||
printf("%f\n", w->wo[0]);
|
||||
printf("%f\n", w->w1[0]);
|
||||
printf("%f\n", w->w2[0]);
|
||||
printf("%f\n", w->w3[0]);
|
||||
printf("%f\n", w->rms_att_weight[0]);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////// ggml structs and functions required to load models, configs and save the model.
|
||||
|
||||
struct llama_vocab {
|
||||
using id = int32_t;
|
||||
using token = std::string;
|
||||
using ttype = llama_token_type;
|
||||
|
||||
struct token_data {
|
||||
token text;
|
||||
float score;
|
||||
ttype type;
|
||||
};
|
||||
|
||||
std::unordered_map<token, id> token_to_id;
|
||||
std::vector<token_data> id_to_token;
|
||||
};
|
||||
|
||||
struct my_llama_hparams {
|
||||
uint32_t n_vocab = 32000;
|
||||
uint32_t n_ctx = 512; // this is provided as user input?
|
||||
uint32_t n_embd = 4096;
|
||||
uint32_t n_mult = 4;
|
||||
uint32_t n_head = 32;
|
||||
uint32_t n_layer = 32;
|
||||
uint32_t n_rot = 64;
|
||||
bool operator!=(const my_llama_hparams& other) const {
|
||||
return memcmp(this, &other, sizeof(my_llama_hparams));
|
||||
}
|
||||
};
|
||||
|
||||
struct my_llama_layer {
|
||||
// normalization
|
||||
struct ggml_tensor * attention_norm;
|
||||
|
||||
// attention
|
||||
struct ggml_tensor * wq;
|
||||
struct ggml_tensor * wk;
|
||||
struct ggml_tensor * wv;
|
||||
struct ggml_tensor * wo;
|
||||
|
||||
// normalization
|
||||
struct ggml_tensor * ffn_norm;
|
||||
|
||||
// ff
|
||||
struct ggml_tensor * w1;
|
||||
struct ggml_tensor * w2;
|
||||
struct ggml_tensor * w3;
|
||||
};
|
||||
|
||||
struct my_llama_model {
|
||||
struct ggml_context * ctx = NULL;
|
||||
|
||||
my_llama_hparams hparams;
|
||||
|
||||
struct ggml_tensor * tok_embeddings;
|
||||
|
||||
struct ggml_tensor * norm;
|
||||
struct ggml_tensor * output;
|
||||
|
||||
std::vector<my_llama_layer> layers;
|
||||
|
||||
uint32_t train_its = 0;
|
||||
uint32_t train_samples = 0;
|
||||
uint32_t train_tokens = 0;
|
||||
};
|
||||
|
||||
struct train_params {
|
||||
const char * fn_vocab_model;
|
||||
const char * fn_llama2c_model;
|
||||
const char * fn_llama2c_output_model;
|
||||
const char * fn_train_data;
|
||||
const char * fn_checkpoint_in;
|
||||
const char * fn_checkpoint_out;
|
||||
const char * fn_model_out;
|
||||
|
||||
uint32_t seed;
|
||||
|
||||
int n_ctx;
|
||||
int n_embd;
|
||||
int n_mult;
|
||||
int n_head;
|
||||
int n_layer;
|
||||
int n_rotmax;
|
||||
|
||||
int n_threads;
|
||||
int n_batch;
|
||||
int n_examples;
|
||||
int n_predict;
|
||||
|
||||
int print_info_interval;
|
||||
int print_details_interval;
|
||||
|
||||
bool samples_start_after_nl;
|
||||
bool use_adam;
|
||||
bool use_flash;
|
||||
bool use_scratch;
|
||||
|
||||
// only adam
|
||||
int warmup;
|
||||
int cos_decay_steps;
|
||||
float cos_decay_restart;
|
||||
float cos_decay_alpha;
|
||||
|
||||
int lbfgs_n_iter;
|
||||
int adam_n_iter;
|
||||
float adam_alpha;
|
||||
float adam_decay;
|
||||
|
||||
int mem_model_gb;
|
||||
int mem_compute_gb;
|
||||
int mem_compute0_gb;
|
||||
int mem_compute1_gb;
|
||||
};
|
||||
|
||||
uint32_t get_n_ff(const struct my_llama_hparams* hparams) {
|
||||
const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult;
|
||||
return n_ff;
|
||||
}
|
||||
|
||||
void print_params(struct my_llama_hparams * params) {
|
||||
printf("%s: n_vocab: %d\n", __func__, params->n_vocab);
|
||||
printf("%s: n_ctx: %d\n", __func__, params->n_ctx);
|
||||
printf("%s: n_embd: %d\n", __func__, params->n_embd);
|
||||
printf("%s: n_mult: %d\n", __func__, params->n_mult);
|
||||
printf("%s: n_head: %d\n", __func__, params->n_head);
|
||||
printf("%s: n_ff: %d\n", __func__, get_n_ff(params));
|
||||
printf("%s: n_layer: %d\n", __func__, params->n_layer);
|
||||
printf("%s: n_rot: %d\n", __func__, params->n_rot);
|
||||
}
|
||||
|
||||
void init_model(struct my_llama_model * model) {
|
||||
const auto & hparams = model->hparams;
|
||||
|
||||
const uint32_t n_embd = hparams.n_embd;
|
||||
const uint32_t n_layer = hparams.n_layer;
|
||||
const uint32_t n_vocab = hparams.n_vocab;
|
||||
|
||||
const uint32_t n_ff = get_n_ff(&hparams);
|
||||
struct ggml_context * ctx = model->ctx;
|
||||
|
||||
model->train_its = 0;
|
||||
model->train_samples = 0;
|
||||
model->train_tokens = 0;
|
||||
|
||||
model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
||||
printf("[%s:GG] Allocating [%d] x [%d] = [%d] float space for model->tok_embeddings\n",__func__,n_embd , n_vocab, n_embd * n_vocab);
|
||||
|
||||
model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||
printf("[%s:GG] Allocating [%d] float space for model->norm\n",__func__,n_embd);
|
||||
|
||||
model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->output\n",__func__,n_embd, n_vocab, n_embd * n_vocab);
|
||||
|
||||
// printing the per-layer allocations here so we dont print in the for loop.
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wq for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wk for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wv for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wo for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||
|
||||
printf("[%s:GG] Allocating [%d] float space for layer.ffn_norm for [%d] layers\n",__func__,n_embd, n_layer);
|
||||
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w1 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w2 for [%d] layers\n",__func__, n_embd, n_ff, n_ff * n_embd, n_layer);
|
||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w3 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||
|
||||
ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
|
||||
ggml_set_name(model->norm, "norm.weight");
|
||||
ggml_set_name(model->output, "output.weight");
|
||||
|
||||
model->layers.resize(n_layer);
|
||||
for (uint32_t i = 0; i < n_layer; ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
|
||||
std::string layers_i = "layers." + std::to_string(i);
|
||||
|
||||
layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||
|
||||
layer.wq = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
|
||||
layer.wk = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
|
||||
layer.wv = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
|
||||
layer.wo = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
|
||||
|
||||
layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||
|
||||
layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||
layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
||||
layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||
|
||||
ggml_set_name(layer.attention_norm, (layers_i + ".attention_norm.weight").c_str());
|
||||
|
||||
ggml_set_name(layer.wq, (layers_i + ".attention.wq.weight").c_str());
|
||||
ggml_set_name(layer.wk, (layers_i + ".attention.wk.weight").c_str());
|
||||
ggml_set_name(layer.wv, (layers_i + ".attention.wv.weight").c_str());
|
||||
ggml_set_name(layer.wo, (layers_i + ".attention.wo.weight").c_str());
|
||||
|
||||
ggml_set_name(layer.ffn_norm, (layers_i + ".ffn_norm.weight").c_str());
|
||||
|
||||
ggml_format_name(layer.w1, "%s.feed_forward.w1.weight", layers_i.c_str());
|
||||
ggml_format_name(layer.w2, "%s.feed_forward.w2.weight", layers_i.c_str());
|
||||
ggml_format_name(layer.w3, "%s.feed_forward.w3.weight", layers_i.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
|
||||
float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
|
||||
int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
void print_row(struct ggml_tensor * probs, int i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = get_f32_2d(probs, k, i);
|
||||
printf(" %f", p);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void print_matrix(struct ggml_tensor * probs) {
|
||||
assert(probs->n_dims == 2);
|
||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = get_f32_2d(probs, k, i);
|
||||
printf(" %.2f", p);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#ifdef __MINGW32__
|
||||
__attribute__((format(gnu_printf, 1, 2)))
|
||||
#else
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif
|
||||
#endif
|
||||
static std::string format(const char * fmt, ...) {
|
||||
va_list ap, ap2;
|
||||
va_start(ap, fmt);
|
||||
va_copy(ap2, ap);
|
||||
int size = vsnprintf(NULL, 0, fmt, ap);
|
||||
GGML_ASSERT(size >= 0 && size < INT_MAX);
|
||||
std::vector<char> buf(size + 1);
|
||||
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
||||
GGML_ASSERT(size2 == size);
|
||||
va_end(ap2);
|
||||
va_end(ap);
|
||||
return std::string(buf.data(), size);
|
||||
}
|
||||
|
||||
struct llama_file {
|
||||
// use FILE * so we don't have to re-open the file to mmap
|
||||
FILE * fp;
|
||||
size_t size;
|
||||
|
||||
llama_file(const char * fname, const char * mode) {
|
||||
fp = std::fopen(fname, mode);
|
||||
if (fp == NULL) {
|
||||
size = 0;
|
||||
} else {
|
||||
seek(0, SEEK_END);
|
||||
size = tell();
|
||||
seek(0, SEEK_SET);
|
||||
}
|
||||
}
|
||||
|
||||
size_t tell() const {
|
||||
#ifdef _WIN32
|
||||
__int64 ret = _ftelli64(fp);
|
||||
#else
|
||||
long ret = std::ftell(fp);
|
||||
#endif
|
||||
GGML_ASSERT(ret != -1); // this really shouldn't fail
|
||||
return (size_t) ret;
|
||||
}
|
||||
|
||||
void seek(size_t offset, int whence) {
|
||||
#ifdef _WIN32
|
||||
int ret = _fseeki64(fp, (__int64) offset, whence);
|
||||
#else
|
||||
int ret = std::fseek(fp, (long) offset, whence);
|
||||
#endif
|
||||
GGML_ASSERT(ret == 0); // same
|
||||
}
|
||||
|
||||
void read_raw(void * ptr, size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
std::size_t ret = std::fread(ptr, size, 1, fp);
|
||||
if (ferror(fp)) {
|
||||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||
}
|
||||
if (ret != 1) {
|
||||
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
||||
}
|
||||
}
|
||||
|
||||
std::uint32_t read_u32() {
|
||||
std::uint32_t ret;
|
||||
read_raw(&ret, sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
std::float_t read_f32() {
|
||||
std::float_t ret;
|
||||
read_raw(&ret, sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string read_string(std::uint32_t len) {
|
||||
std::vector<char> chars(len);
|
||||
read_raw(chars.data(), len);
|
||||
return std::string(chars.data(), len);
|
||||
}
|
||||
|
||||
void write_raw(const void * ptr, size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
size_t ret = std::fwrite(ptr, size, 1, fp);
|
||||
if (ret != 1) {
|
||||
throw std::runtime_error(format("write error: %s", strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
void write_u32(std::uint32_t val) {
|
||||
write_raw(&val, sizeof(val));
|
||||
}
|
||||
|
||||
~llama_file() {
|
||||
if (fp) {
|
||||
std::fclose(fp);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
|
||||
if (tensor == NULL) {
|
||||
file->write_u32(0);
|
||||
file->write_u32(0);
|
||||
file->write_u32(GGML_TYPE_F32);
|
||||
file->seek((0-file->tell()) & 31, SEEK_CUR);
|
||||
return;
|
||||
}
|
||||
const char * name = ggml_get_name(tensor);
|
||||
uint32_t name_len = strlen(name);
|
||||
uint32_t nd = tensor->n_dims;
|
||||
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
|
||||
(uint32_t)tensor->ne[1],
|
||||
(uint32_t)tensor->ne[2],
|
||||
(uint32_t)tensor->ne[3] };
|
||||
file->write_u32(nd);
|
||||
file->write_u32(name_len);
|
||||
file->write_u32(tensor->type);
|
||||
file->write_raw(ne, sizeof(ne[0]) * nd);
|
||||
file->write_raw(name, name_len);
|
||||
file->seek((0-file->tell()) & 31, SEEK_CUR);
|
||||
file->write_raw(tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
|
||||
bool is_ggml_file(const char *filename) {
|
||||
llama_file file(filename, "rb");
|
||||
if (file.size < 4) {
|
||||
return false;
|
||||
}
|
||||
uint32_t magic = file.read_u32();
|
||||
return magic == GGUF_MAGIC;
|
||||
}
|
||||
|
||||
void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
|
||||
// heuristic to infer whether vocab is from ggml or from llama2.c vocabulary
|
||||
if (is_ggml_file(filename)) {
|
||||
|
||||
struct llama_context_params llama_params = llama_context_default_params();
|
||||
llama_params.vocab_only = true;
|
||||
|
||||
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
|
||||
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
|
||||
|
||||
const int n_vocab = llama_n_vocab(lctx);
|
||||
vocab->id_to_token.resize(n_vocab);
|
||||
for (int i=0; i<n_vocab; ++i) {
|
||||
vocab->id_to_token[i].text = llama_token_get_text(lctx, i);
|
||||
vocab->id_to_token[i].score = llama_token_get_score(lctx, i);
|
||||
vocab->id_to_token[i].type = llama_token_get_type(lctx, i);
|
||||
vocab->token_to_id.emplace(vocab->id_to_token[i].text, i);
|
||||
}
|
||||
llama_free(lctx);
|
||||
llama_free_model(lmodel);
|
||||
} else { // assume llama2.c vocabulary
|
||||
printf("Assuming llama2.c vocabulary since %s is not a ggml file\n", filename);
|
||||
llama_file file(filename, "rb");
|
||||
const int n_vocab = config->vocab_size;
|
||||
/* uint32_t max_token_length = */ file.read_u32(); // unused
|
||||
vocab->id_to_token.resize(n_vocab);
|
||||
for (int i=0; i<n_vocab; ++i) {
|
||||
float_t score = file.read_f32();
|
||||
uint32_t len = file.read_u32();
|
||||
std::string text = file.read_string(len);
|
||||
vocab->id_to_token[i].text = text;
|
||||
vocab->id_to_token[i].score = score;
|
||||
vocab->id_to_token[i].type = LLAMA_TOKEN_TYPE_UNDEFINED;
|
||||
vocab->token_to_id.emplace(text, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){
|
||||
int ct;
|
||||
switch (gg_weights->n_dims){
|
||||
case 1:
|
||||
ct = 0;
|
||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
|
||||
float * ptr = (float *) ((char *) gg_weights->data + i0*gg_weights->nb[0]);
|
||||
*ptr = karpathy_weights[ct];
|
||||
ct++;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
ct = 0;
|
||||
for (int i1 = 0; i1 < gg_weights->ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) {
|
||||
float * ptr = (float *) ((char *) gg_weights->data + i0*gg_weights->nb[0] + i1*gg_weights->nb[1]);
|
||||
*ptr = karpathy_weights[ct];
|
||||
ct++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
ct = 0;
|
||||
for (int i2 = 0; i2 < gg_weights->ne[2]; i2++) {
|
||||
for (int i1 = 0; i1 < gg_weights->ne[1]; i1++) {
|
||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++) {
|
||||
float * ptr = (float *) ((char *) gg_weights->data + i0*gg_weights->nb[0] + i1*gg_weights->nb[1] + i2*gg_weights->nb[2]);
|
||||
*ptr = karpathy_weights[ct];
|
||||
ct++;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename) {
|
||||
struct llama_file file(filename, "wb");
|
||||
if (file.fp == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma message("TODO: implement file saving using gguf")
|
||||
(void) vocab;
|
||||
(void) model;
|
||||
(void) w;
|
||||
// // write_magic
|
||||
// file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
// file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
// // write_hparams
|
||||
// file.write_u32(model->hparams.n_vocab);
|
||||
// file.write_u32(model->hparams.n_embd);
|
||||
// file.write_u32(model->hparams.n_mult);
|
||||
// file.write_u32(model->hparams.n_head);
|
||||
// file.write_u32(model->hparams.n_layer);
|
||||
// file.write_u32(model->hparams.n_rot);
|
||||
// file.write_u32(LLAMA_FTYPE_ALL_F32);
|
||||
//
|
||||
// // write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk.
|
||||
// uint32_t n_vocab = model->hparams.n_vocab;
|
||||
// for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
// const auto & token_data = vocab->id_to_token.at(i);
|
||||
// file.write_u32((uint32_t) token_data.tok.size());
|
||||
// file.write_raw(token_data.tok.data(), token_data.tok.size());
|
||||
// file.write_raw(&token_data.score, sizeof(token_data.score));
|
||||
// }
|
||||
//
|
||||
// // stuff AK weights into GG weights one by one.
|
||||
// // w->token_embedding_table -> model->tok_embeddings
|
||||
// // float* -> struct ggml_tensor
|
||||
// stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table);
|
||||
// stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table);
|
||||
//
|
||||
// stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight);
|
||||
// //print_row(model->norm, 0);
|
||||
//
|
||||
// // for rms-att-weight
|
||||
// int row_length = model->hparams.n_embd;
|
||||
// const auto & hparams = model->hparams;
|
||||
// //int n_ff = model->hparams.n_embd;
|
||||
// int n_ff = get_n_ff(&hparams);
|
||||
//
|
||||
// for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
||||
// auto & layer = model->layers[i];
|
||||
// // 1d
|
||||
// stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
|
||||
//
|
||||
// // from 3d matrix layer x dim x dim to 2d matrix dim x dim
|
||||
// stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]);
|
||||
//
|
||||
// stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]);
|
||||
// stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]);
|
||||
// }
|
||||
// // write tensors
|
||||
// write_tensor(&file, model->tok_embeddings);
|
||||
// write_tensor(&file, model->norm);
|
||||
// write_tensor(&file, model->output); // ?
|
||||
// for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||
// auto & layer = model->layers[i];
|
||||
//
|
||||
// write_tensor(&file, layer.attention_norm);
|
||||
// write_tensor(&file, layer.wq);
|
||||
// write_tensor(&file, layer.wk);
|
||||
// write_tensor(&file, layer.wv);
|
||||
// write_tensor(&file, layer.wo);
|
||||
// write_tensor(&file, layer.ffn_norm);
|
||||
// write_tensor(&file, layer.w1);
|
||||
// write_tensor(&file, layer.w2);
|
||||
// write_tensor(&file, layer.w3);
|
||||
// }
|
||||
}
|
||||
|
||||
struct train_params get_default_train_params() {
|
||||
struct train_params params;
|
||||
params.fn_vocab_model = "models/ggml-vocab.bin";
|
||||
params.fn_llama2c_output_model = "ak_llama_model.bin";
|
||||
params.fn_train_data = "shakespeare.txt";
|
||||
params.fn_checkpoint_in = "checkpoint.bin";
|
||||
params.fn_checkpoint_out = "checkpoint.bin";
|
||||
params.fn_model_out = "ggml-checkpoint-f32.bin";
|
||||
|
||||
params.seed = -1;
|
||||
|
||||
params.n_ctx = 128;
|
||||
params.n_embd = 256;
|
||||
params.n_mult = 256;
|
||||
params.n_head = 8;
|
||||
params.n_layer = 16;
|
||||
params.n_rotmax = 64;
|
||||
|
||||
params.n_threads = 6;
|
||||
params.n_batch = 8;
|
||||
params.n_examples = 8;
|
||||
params.n_predict = 1024;
|
||||
|
||||
params.print_info_interval = 1;
|
||||
params.print_details_interval = 2;
|
||||
|
||||
params.samples_start_after_nl = false;
|
||||
params.use_adam = true;
|
||||
params.use_flash = true;
|
||||
params.use_scratch = true;
|
||||
|
||||
// only adam
|
||||
params.warmup = 100;
|
||||
params.cos_decay_steps = 1000;
|
||||
params.cos_decay_restart = 1.1f;
|
||||
params.cos_decay_alpha = 0.0f;
|
||||
|
||||
params.lbfgs_n_iter = 16;
|
||||
params.adam_n_iter = 16;
|
||||
params.adam_alpha = 1e-3f;
|
||||
params.adam_decay = 1e-3f;
|
||||
|
||||
params.mem_model_gb = 2;
|
||||
params.mem_compute_gb = 24;
|
||||
params.mem_compute0_gb = 8;
|
||||
params.mem_compute1_gb = 2;
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
void print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||
fprintf(stderr, " --copy-vocab-from-model FNAME llama2.c vocabulary or ggml model path from which to copy vocab (default '%s')\n", params->fn_vocab_model);
|
||||
fprintf(stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n");
|
||||
fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
bool params_parse(int argc, char ** argv, struct train_params * params) {
|
||||
bool invalid_param = false;
|
||||
bool reqd_param_found = false;
|
||||
std::string arg;
|
||||
struct train_params default_params = get_default_train_params();
|
||||
const std::string arg_prefix = "--";
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
arg = argv[i];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
if (arg == "--copy-vocab-from-model") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params->fn_vocab_model = argv[i];
|
||||
} else if (arg == "--llama2c-model") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
reqd_param_found = true;
|
||||
params->fn_llama2c_model = argv[i];
|
||||
} else if (arg == "--llama2c-output-model") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params->fn_llama2c_output_model = argv[i];
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
print_usage(argc, argv, &default_params);
|
||||
exit(0);
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv, &default_params);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (invalid_param) {
|
||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv, &default_params);
|
||||
exit(1);
|
||||
}
|
||||
if (!reqd_param_found){
|
||||
fprintf(stderr, "error: please specify a llama2.c .bin file to be converted with argument --llama2c-model\n");
|
||||
print_usage(argc, argv, &default_params);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
struct train_params params = get_default_train_params();
|
||||
if (!params_parse(argc, argv, ¶ms)) {
|
||||
return 1;
|
||||
}
|
||||
Config config;
|
||||
TransformerWeights weights;
|
||||
{
|
||||
FILE *file = fopen(params.fn_llama2c_model, "rb");
|
||||
if (!file) { printf("Unable to open the checkpoint file %s!\n", params.fn_llama2c_model); return 1; }
|
||||
// read in the config header
|
||||
if(fread(&config, sizeof(Config), 1, file) != 1) { return 1; }
|
||||
// read in the Transformer weights
|
||||
malloc_weights(&weights, &config);
|
||||
if(checkpoint_init_weights(&weights, &config, file)) { return 1; }
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
struct llama_vocab vocab;
|
||||
load_vocab(params.fn_vocab_model, &config, &vocab);
|
||||
|
||||
struct my_llama_model model;
|
||||
model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);
|
||||
model.hparams.n_ctx = params.n_ctx;
|
||||
model.hparams.n_embd = config.dim; //params.n_embd;
|
||||
model.hparams.n_mult = 32;//params.n_mult;
|
||||
model.hparams.n_head = config.n_heads; //params.n_head;
|
||||
model.hparams.n_layer = config.n_layers; //params.n_layer;
|
||||
model.hparams.n_rot = std::min((uint32_t)params.n_rotmax, model.hparams.n_embd / model.hparams.n_head);
|
||||
print_params(&model.hparams);
|
||||
struct ggml_init_params lcparams;
|
||||
lcparams.mem_size = 1024ll*1024ll*1024ll*((size_t) params.mem_model_gb);
|
||||
lcparams.mem_buffer = NULL;
|
||||
lcparams.no_alloc = false;
|
||||
|
||||
model.ctx = ggml_init(lcparams);
|
||||
|
||||
init_model(&model);
|
||||
save_as_llama_model(&vocab, &model, &weights, params.fn_llama2c_output_model);
|
||||
|
||||
printf("Saving llama.c model file %s in ggml format at %s\n", params.fn_llama2c_model, params.fn_llama2c_output_model);
|
||||
|
||||
ggml_free(model.ctx);
|
||||
free_weights(&weights);
|
||||
return 0;
|
||||
}
|
||||
@@ -30,7 +30,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
|
||||
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
|
||||
|
||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||
params.seed = time(NULL);
|
||||
params.seed = uint32_t(time(NULL));
|
||||
}
|
||||
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
|
||||
|
||||
@@ -167,7 +167,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
|
||||
// TODO: Apply penalties
|
||||
// float nl_logit = logits[llama_token_nl()];
|
||||
// float nl_logit = logits[llama_token_nl(ctx)];
|
||||
// auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
||||
// llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
// last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
@@ -176,7 +176,7 @@ llama_token sampling_id(struct MyModel* mymodel) {
|
||||
// last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
// last_n_repeat, alpha_frequency, alpha_presence);
|
||||
// if (!penalize_nl) {
|
||||
// logits[llama_token_nl()] = nl_logit;
|
||||
// logits[llama_token_nl(ctx)] = nl_logit;
|
||||
// }
|
||||
|
||||
if (temp <= 0) {
|
||||
@@ -211,7 +211,7 @@ const char * sampling(struct MyModel * mymodel) {
|
||||
llama_context * ctx = mymodel->ctx;
|
||||
int id = sampling_id(mymodel);
|
||||
static std::string ret;
|
||||
if (id == llama_token_eos()) {
|
||||
if (id == llama_token_eos(ctx)) {
|
||||
ret = "</s>";
|
||||
} else {
|
||||
ret = llama_token_to_str(ctx, id);
|
||||
|
||||
@@ -67,28 +67,35 @@ int main(int argc, char ** argv) {
|
||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
|
||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]).c_str());
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
if (params.embedding){
|
||||
if (embd_inp.size() > 0) {
|
||||
if (llama_eval(ctx, embd_inp.data(), embd_inp.size(), n_past, params.n_threads)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
const int n_embd = llama_n_embd(ctx);
|
||||
const auto embeddings = llama_get_embeddings(ctx);
|
||||
|
||||
for (int i = 0; i < n_embd; i++) {
|
||||
printf("%f ", embeddings[i]);
|
||||
}
|
||||
printf("\n");
|
||||
if (embd_inp.size() > (size_t)params.n_ctx) {
|
||||
fprintf(stderr, "%s: error: prompt is longer than the context window (%zu tokens, n_ctx = %d)\n",
|
||||
__func__, embd_inp.size(), params.n_ctx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (!embd_inp.empty()) {
|
||||
int n_tokens = std::min(params.n_batch, (int) embd_inp.size());
|
||||
if (llama_eval(ctx, embd_inp.data(), n_tokens, n_past, params.n_threads)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
n_past += n_tokens;
|
||||
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
|
||||
}
|
||||
|
||||
const int n_embd = llama_n_embd(ctx);
|
||||
const auto embeddings = llama_get_embeddings(ctx);
|
||||
|
||||
for (int i = 0; i < n_embd; i++) {
|
||||
printf("%f ", embeddings[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
llama_print_timings(ctx);
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
|
||||
246
examples/gguf/gguf.cpp
Normal file
246
examples/gguf/gguf.cpp
Normal file
@@ -0,0 +1,246 @@
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cinttypes>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
template<typename T>
|
||||
static std::string to_string(const T & val) {
|
||||
std::stringstream ss;
|
||||
ss << val;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
bool gguf_ex_write(const std::string & fname) {
|
||||
struct gguf_context * ctx = gguf_init_empty();
|
||||
|
||||
gguf_set_val_u8 (ctx, "some.parameter.uint8", 0x12);
|
||||
gguf_set_val_i8 (ctx, "some.parameter.int8", -0x13);
|
||||
gguf_set_val_u16 (ctx, "some.parameter.uint16", 0x1234);
|
||||
gguf_set_val_i16 (ctx, "some.parameter.int16", -0x1235);
|
||||
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
|
||||
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
|
||||
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
|
||||
gguf_set_val_bool(ctx, "some.parameter.bool", true);
|
||||
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
|
||||
|
||||
gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
|
||||
gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
|
||||
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ 128ull*1024ull*1024ull,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx_data = ggml_init(params);
|
||||
|
||||
const int n_tensors = 10;
|
||||
|
||||
// tensor infos
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const std::string name = "tensor_" + to_string(i);
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS] = { 1 };
|
||||
int32_t n_dims = rand() % GGML_MAX_DIMS + 1;
|
||||
|
||||
for (int j = 0; j < n_dims; ++j) {
|
||||
ne[j] = rand() % 10 + 1;
|
||||
}
|
||||
|
||||
struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne);
|
||||
ggml_set_name(cur, name.c_str());
|
||||
|
||||
{
|
||||
float * data = (float *) cur->data;
|
||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
||||
data[j] = 100 + i;
|
||||
}
|
||||
}
|
||||
|
||||
gguf_add_tensor(ctx, cur);
|
||||
}
|
||||
|
||||
gguf_write_to_file(ctx, fname.c_str(), false);
|
||||
|
||||
fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str());
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// just read tensor info
|
||||
bool gguf_ex_read_0(const std::string & fname) {
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ NULL,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
|
||||
fprintf(stdout, "%s: version: %d\n", __func__, gguf_get_version(ctx));
|
||||
fprintf(stdout, "%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
|
||||
fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
|
||||
|
||||
// kv
|
||||
{
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
|
||||
fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv);
|
||||
|
||||
for (int i = 0; i < n_kv; ++i) {
|
||||
const char * key = gguf_get_key(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key);
|
||||
}
|
||||
}
|
||||
|
||||
// find kv string
|
||||
{
|
||||
const char * findkey = "some.parameter.string";
|
||||
|
||||
const int keyidx = gguf_find_key(ctx, findkey);
|
||||
if (keyidx == -1) {
|
||||
fprintf(stdout, "%s: find key: %s not found.\n", __func__, findkey);
|
||||
} else {
|
||||
const char * key_value = gguf_get_val_str(ctx, keyidx);
|
||||
fprintf(stdout, "%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value);
|
||||
}
|
||||
}
|
||||
|
||||
// tensor info
|
||||
{
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
|
||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
}
|
||||
}
|
||||
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// read and create ggml_context containing the tensors and their data
|
||||
bool gguf_ex_read_1(const std::string & fname) {
|
||||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
|
||||
fprintf(stdout, "%s: version: %d\n", __func__, gguf_get_version(ctx));
|
||||
fprintf(stdout, "%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
|
||||
fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
|
||||
|
||||
// kv
|
||||
{
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
|
||||
fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv);
|
||||
|
||||
for (int i = 0; i < n_kv; ++i) {
|
||||
const char * key = gguf_get_key(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key);
|
||||
}
|
||||
}
|
||||
|
||||
// tensor info
|
||||
{
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
|
||||
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
|
||||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name (ctx, i);
|
||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
||||
|
||||
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// data
|
||||
{
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
fprintf(stdout, "%s: reading tensor %d data\n", __func__, i);
|
||||
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
|
||||
fprintf(stdout, "%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data);
|
||||
|
||||
// print first 10 elements
|
||||
const float * data = (const float *) cur->data;
|
||||
|
||||
printf("%s data[:10] : ", name);
|
||||
for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
|
||||
printf("%f ", data[j]);
|
||||
}
|
||||
printf("\n\n");
|
||||
|
||||
// check data
|
||||
{
|
||||
const float * data = (const float *) cur->data;
|
||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
||||
if (data[j] != 100 + i) {
|
||||
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3) {
|
||||
fprintf(stdout, "usage: %s data.gguf r|w\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const std::string fname(argv[1]);
|
||||
const std::string mode (argv[2]);
|
||||
|
||||
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
|
||||
|
||||
if (mode == "w") {
|
||||
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
||||
} else if (mode == "r") {
|
||||
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
||||
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
1133
examples/gptneox-wip/cmpnct_gpt2bpe.hpp
Normal file
1133
examples/gptneox-wip/cmpnct_gpt2bpe.hpp
Normal file
File diff suppressed because it is too large
Load Diff
1111
examples/gptneox-wip/falcon-main.cpp
Normal file
1111
examples/gptneox-wip/falcon-main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1082
examples/gptneox-wip/gptneox-main.cpp
Normal file
1082
examples/gptneox-wip/gptneox-main.cpp
Normal file
File diff suppressed because it is too large
Load Diff
132
examples/json-schema-to-grammar.py
Normal file
132
examples/json-schema-to-grammar.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||
# whitespace. Also maybe improves generation quality?
|
||||
SPACE_RULE = '" "?'
|
||||
|
||||
PRIMITIVE_RULES = {
|
||||
'boolean': '("true" | "false") space',
|
||||
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
'string': r''' "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space ''',
|
||||
'null': '"null" space',
|
||||
}
|
||||
|
||||
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'}
|
||||
|
||||
|
||||
class SchemaConverter:
|
||||
def __init__(self, prop_order):
|
||||
self._prop_order = prop_order
|
||||
self._rules = {'space': SPACE_RULE}
|
||||
|
||||
def _format_literal(self, literal):
|
||||
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
||||
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
|
||||
)
|
||||
return f'"{escaped}"'
|
||||
|
||||
def _add_rule(self, name, rule):
|
||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||
key = esc_name
|
||||
else:
|
||||
i = 0
|
||||
while f'{esc_name}{i}' in self._rules:
|
||||
i += 1
|
||||
key = f'{esc_name}{i}'
|
||||
self._rules[key] = rule
|
||||
return key
|
||||
|
||||
def visit(self, schema, name):
|
||||
schema_type = schema.get('type')
|
||||
rule_name = name or 'root'
|
||||
|
||||
if 'oneOf' in schema or 'anyOf' in schema:
|
||||
rule = ' | '.join((
|
||||
self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
|
||||
for i, alt_schema in enumerate(schema.get('oneOf') or schema['anyOf'])
|
||||
))
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif 'const' in schema:
|
||||
return self._add_rule(rule_name, self._format_literal(schema['const']))
|
||||
|
||||
elif 'enum' in schema:
|
||||
rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type == 'object' and 'properties' in schema:
|
||||
# TODO: `required` keyword
|
||||
prop_order = self._prop_order
|
||||
prop_pairs = sorted(
|
||||
schema['properties'].items(),
|
||||
# sort by position in prop_order (if specified) then by key
|
||||
key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
|
||||
)
|
||||
|
||||
rule = '"{" space'
|
||||
for i, (prop_name, prop_schema) in enumerate(prop_pairs):
|
||||
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
|
||||
if i > 0:
|
||||
rule += ' "," space'
|
||||
rule += fr' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
|
||||
rule += ' "}" space'
|
||||
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type == 'array' and 'items' in schema:
|
||||
# TODO `prefixItems` keyword
|
||||
item_rule_name = self.visit(schema['items'], f'{name}{"-" if name else ""}item')
|
||||
rule = f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
else:
|
||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||
return self._add_rule(
|
||||
'root' if rule_name == 'root' else schema_type,
|
||||
PRIMITIVE_RULES[schema_type]
|
||||
)
|
||||
|
||||
def format_grammar(self):
|
||||
return '\n'.join((f'{name} ::= {rule}' for name, rule in self._rules.items()))
|
||||
|
||||
|
||||
def main(args_in = None):
|
||||
parser = argparse.ArgumentParser(
|
||||
description='''
|
||||
Generates a grammar (suitable for use in ./main) that produces JSON conforming to a
|
||||
given JSON schema. Only a subset of JSON schema features are supported; more may be
|
||||
added in the future.
|
||||
''',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--prop-order',
|
||||
default=[],
|
||||
type=lambda s: s.split(','),
|
||||
help='''
|
||||
comma-separated property names defining the order of precedence for object properties;
|
||||
properties not specified here are given lower precedence than those that are, and are
|
||||
sorted alphabetically
|
||||
'''
|
||||
)
|
||||
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
|
||||
args = parser.parse_args(args_in)
|
||||
|
||||
schema = json.load(sys.stdin if args.schema == '-' else open(args.schema))
|
||||
prop_order = {name: idx for idx, name in enumerate(args.prop_order)}
|
||||
converter = SchemaConverter(prop_order)
|
||||
converter.visit(schema, '')
|
||||
print(converter.format_grammar())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
8
examples/llama-bench/CMakeLists.txt
Normal file
8
examples/llama-bench/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
set(TARGET llama-bench)
|
||||
add_executable(${TARGET} llama-bench.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
969
examples/llama-bench/llama-bench.cpp
Executable file
969
examples/llama-bench/llama-bench.cpp
Executable file
@@ -0,0 +1,969 @@
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
#include "build-info.h"
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
// utils
|
||||
static uint64_t get_time_ns() {
|
||||
using clock = std::chrono::high_resolution_clock;
|
||||
return std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static std::string join(const std::vector<T> & values, const std::string & delim) {
|
||||
std::ostringstream str;
|
||||
for (size_t i = 0; i < values.size(); i++) {
|
||||
str << values[i];
|
||||
if (i < values.size() - 1) {
|
||||
str << delim;
|
||||
}
|
||||
}
|
||||
return str.str();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static std::vector<T> split(const std::string & str, char delim) {
|
||||
std::vector<T> values;
|
||||
std::istringstream str_stream(str);
|
||||
std::string token;
|
||||
while (std::getline(str_stream, token, delim)) {
|
||||
T value;
|
||||
std::istringstream token_stream(token);
|
||||
token_stream >> value;
|
||||
values.push_back(value);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T avg(const std::vector<T> & v) {
|
||||
if (v.empty()) {
|
||||
return 0;
|
||||
}
|
||||
T sum = std::accumulate(v.begin(), v.end(), T(0));
|
||||
return sum / (T)v.size();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T stdev(const std::vector<T> & v) {
|
||||
if (v.size() <= 1) {
|
||||
return 0;
|
||||
}
|
||||
T mean = avg(v);
|
||||
T sq_sum = std::inner_product(v.begin(), v.end(), v.begin(), T(0));
|
||||
T stdev = std::sqrt(sq_sum / (T)(v.size() - 1) - mean * mean * (T)v.size() / (T)(v.size() - 1));
|
||||
return stdev;
|
||||
}
|
||||
|
||||
static bool ggml_cpu_has_metal() {
|
||||
#if defined(GGML_USE_METAL)
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::string get_cpu_info() {
|
||||
std::string id;
|
||||
#ifdef __linux__
|
||||
FILE * f = fopen("/proc/cpuinfo", "r");
|
||||
if (f) {
|
||||
char buf[1024];
|
||||
while (fgets(buf, sizeof(buf), f)) {
|
||||
if (strncmp(buf, "model name", 10) == 0) {
|
||||
char * p = strchr(buf, ':');
|
||||
if (p) {
|
||||
p++;
|
||||
while (std::isspace(*p)) {
|
||||
p++;
|
||||
}
|
||||
while (std::isspace(p[strlen(p) - 1])) {
|
||||
p[strlen(p) - 1] = '\0';
|
||||
}
|
||||
id = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// TODO: other platforms
|
||||
return id;
|
||||
}
|
||||
|
||||
static std::string get_gpu_info() {
|
||||
std::string id;
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
int count = ggml_cuda_get_device_count();
|
||||
for (int i = 0; i < count; i++) {
|
||||
char buf[128];
|
||||
ggml_cuda_get_device_description(i, buf, sizeof(buf));
|
||||
id += buf;
|
||||
if (i < count - 1) {
|
||||
id += "/";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// TODO: other backends
|
||||
return id;
|
||||
}
|
||||
|
||||
// command line params
|
||||
enum output_formats {CSV, JSON, MARKDOWN, SQL};
|
||||
|
||||
struct cmd_params {
|
||||
std::vector<std::string> model;
|
||||
std::vector<int> n_prompt;
|
||||
std::vector<int> n_gen;
|
||||
std::vector<int> n_batch;
|
||||
std::vector<bool> f32_kv;
|
||||
std::vector<int> n_threads;
|
||||
std::vector<int> n_gpu_layers;
|
||||
std::vector<int> main_gpu;
|
||||
std::vector<bool> mul_mat_q;
|
||||
std::vector<bool> low_vram;
|
||||
std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split;
|
||||
int reps;
|
||||
bool verbose;
|
||||
output_formats output_format;
|
||||
};
|
||||
|
||||
static const cmd_params cmd_params_defaults = {
|
||||
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
|
||||
/* n_prompt */ {512},
|
||||
/* n_gen */ {128},
|
||||
/* n_batch */ {512},
|
||||
/* f32_kv */ {false},
|
||||
/* n_threads */ {get_num_physical_cores()},
|
||||
/* n_gpu_layers */ {99},
|
||||
/* main_gpu */ {0},
|
||||
/* mul_mat_q */ {true},
|
||||
/* low_vram */ {false},
|
||||
/* tensor_split */ {{}},
|
||||
/* reps */ 5,
|
||||
/* verbose */ false,
|
||||
/* output_format */ MARKDOWN
|
||||
};
|
||||
|
||||
static void print_usage(int /* argc */, char ** argv) {
|
||||
fprintf(stdout, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "options:\n");
|
||||
fprintf(stdout, " -h, --help\n");
|
||||
fprintf(stdout, " -m, --model <filename> (default: %s)\n", join(cmd_params_defaults.model, ",").c_str());
|
||||
fprintf(stdout, " -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||
fprintf(stdout, " -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||
fprintf(stdout, " -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
|
||||
fprintf(stdout, " --memory-f32 <0|1> (default: %s)\n", join(cmd_params_defaults.f32_kv, ",").c_str());
|
||||
fprintf(stdout, " -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
||||
fprintf(stdout, " -ngl N, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||
fprintf(stdout, " -mg i, --main-gpu <n> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||
fprintf(stdout, " -lv, --low-vram <0|1> (default: %s)\n", join(cmd_params_defaults.low_vram, ",").c_str());
|
||||
fprintf(stdout, " -mmq, --mul-mat-q <0|1> (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str());
|
||||
fprintf(stdout, " -ts, --tensor_split <ts0/ts1/..> \n");
|
||||
fprintf(stdout, " -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||
fprintf(stdout, " -o, --output <csv|json|md|sql> (default: %s)\n", cmd_params_defaults.output_format == CSV ? "csv" : cmd_params_defaults.output_format == JSON ? "json" : cmd_params_defaults.output_format == MARKDOWN ? "md" : "sql");
|
||||
fprintf(stdout, " -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n");
|
||||
|
||||
}
|
||||
|
||||
static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
cmd_params params;
|
||||
std::string arg;
|
||||
bool invalid_param = false;
|
||||
const std::string arg_prefix = "--";
|
||||
const char split_delim = ',';
|
||||
|
||||
params.verbose = cmd_params_defaults.verbose;
|
||||
params.output_format = cmd_params_defaults.output_format;
|
||||
params.reps = cmd_params_defaults.reps;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
arg = argv[i];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
print_usage(argc, argv);
|
||||
exit(0);
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<std::string>(argv[i], split_delim);
|
||||
params.model.insert(params.model.end(), p.begin(), p.end());
|
||||
} else if (arg == "-p" || arg == "--n-prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_prompt.insert(params.n_prompt.end(), p.begin(), p.end());
|
||||
} else if (arg == "-n" || arg == "--n-gen") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_gen.insert(params.n_gen.end(), p.begin(), p.end());
|
||||
} else if (arg == "-b" || arg == "--batch-size") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_batch.insert(params.n_batch.end(), p.begin(), p.end());
|
||||
} else if (arg == "--memory-f32") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.f32_kv.insert(params.f32_kv.end(), p.begin(), p.end());
|
||||
} else if (arg == "-t" || arg == "--threads") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_threads.insert(params.n_threads.end(), p.begin(), p.end());
|
||||
} else if (arg == "-ngl" || arg == "--n-gpu-layers") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
|
||||
} else if (arg == "-mg" || arg == "--main-gpu") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.main_gpu = split<int>(argv[i], split_delim);
|
||||
} else if (arg == "-lv" || arg == "--low-vram") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.low_vram.insert(params.low_vram.end(), p.begin(), p.end());
|
||||
} else if (arg == "-mmq" || arg == "--mul-mat-q") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.mul_mat_q.insert(params.mul_mat_q.end(), p.begin(), p.end());
|
||||
} else if (arg == "-ts" || arg == "--tensor-split") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
for (auto ts : split<std::string>(argv[i], split_delim)) {
|
||||
// split string by ; and /
|
||||
const std::regex regex{R"([;/]+)"};
|
||||
std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1};
|
||||
std::vector<std::string> split_arg{it, {}};
|
||||
GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES);
|
||||
|
||||
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
||||
for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) {
|
||||
if (i < split_arg.size()) {
|
||||
tensor_split[i] = std::stof(split_arg[i]);
|
||||
} else {
|
||||
tensor_split[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
params.tensor_split.push_back(tensor_split);
|
||||
}
|
||||
} else if (arg == "-r" || arg == "--repetitions") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.reps = std::stoi(argv[i]);
|
||||
} else if (arg == "-o" || arg == "--output") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
if (argv[i] == std::string("csv")) {
|
||||
params.output_format = CSV;
|
||||
} else if (argv[i] == std::string("json")) {
|
||||
params.output_format = JSON;
|
||||
} else if (argv[i] == std::string("md")) {
|
||||
params.output_format = MARKDOWN;
|
||||
} else if (argv[i] == std::string("sql")) {
|
||||
params.output_format = SQL;
|
||||
} else {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
} else if (arg == "-v" || arg == "--verbose") {
|
||||
params.verbose = true;
|
||||
} else {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (invalid_param) {
|
||||
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
|
||||
print_usage(argc, argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// set defaults
|
||||
if (params.model.empty()) { params.model = cmd_params_defaults.model; }
|
||||
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; }
|
||||
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; }
|
||||
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; }
|
||||
if (params.f32_kv.empty()) { params.f32_kv = cmd_params_defaults.f32_kv; }
|
||||
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
|
||||
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
|
||||
if (params.mul_mat_q.empty()) { params.mul_mat_q = cmd_params_defaults.mul_mat_q; }
|
||||
if (params.low_vram.empty()) { params.low_vram = cmd_params_defaults.low_vram; }
|
||||
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
|
||||
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; }
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
struct cmd_params_instance {
|
||||
std::string model;
|
||||
int n_prompt;
|
||||
int n_gen;
|
||||
int n_batch;
|
||||
bool f32_kv;
|
||||
int n_threads;
|
||||
int n_gpu_layers;
|
||||
int main_gpu;
|
||||
bool mul_mat_q;
|
||||
bool low_vram;
|
||||
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
||||
|
||||
llama_context_params to_llama_params() const {
|
||||
llama_context_params lparams = llama_context_default_params();
|
||||
lparams.n_ctx = n_prompt + n_gen;
|
||||
lparams.n_batch = n_batch;
|
||||
lparams.f16_kv = !f32_kv;
|
||||
lparams.n_gpu_layers = n_gpu_layers;
|
||||
lparams.main_gpu = main_gpu;
|
||||
lparams.mul_mat_q = mul_mat_q;
|
||||
lparams.low_vram = low_vram;
|
||||
lparams.tensor_split = tensor_split.data();
|
||||
|
||||
return lparams;
|
||||
}
|
||||
};
|
||||
|
||||
static std::vector<cmd_params_instance> get_cmd_params_instances_int(const cmd_params & params, int n_gen, int n_prompt) {
|
||||
std::vector<cmd_params_instance> instances;
|
||||
|
||||
for (const auto & m : params.model)
|
||||
for (const auto & nb : params.n_batch)
|
||||
for (const auto & fk : params.f32_kv)
|
||||
for (const auto & nl : params.n_gpu_layers)
|
||||
for (const auto & mg : params.main_gpu)
|
||||
for (const auto & mmq : params.mul_mat_q)
|
||||
for (const auto & lv : params.low_vram)
|
||||
for (const auto & ts : params.tensor_split)
|
||||
for (const auto & nt : params.n_threads) {
|
||||
cmd_params_instance instance = {
|
||||
/* .model = */ m,
|
||||
/* .n_prompt = */ n_prompt,
|
||||
/* .n_gen = */ n_gen,
|
||||
/* .n_batch = */ nb,
|
||||
/* .f32_kv = */ fk,
|
||||
/* .n_threads = */ nt,
|
||||
/* .n_gpu_layers = */ nl,
|
||||
/* .main_gpu = */ mg,
|
||||
/* .mul_mat_q = */ mmq,
|
||||
/* .low_vram = */ lv,
|
||||
/* .tensor_split = */ ts,
|
||||
};
|
||||
instances.push_back(instance);
|
||||
}
|
||||
return instances;
|
||||
}
|
||||
|
||||
static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_params & params) {
|
||||
std::vector<cmd_params_instance> instances;
|
||||
|
||||
for (const auto & n_prompt : params.n_prompt) {
|
||||
if (n_prompt == 0) {
|
||||
continue;
|
||||
}
|
||||
auto instances_prompt = get_cmd_params_instances_int(params, 0, n_prompt);
|
||||
instances.insert(instances.end(), instances_prompt.begin(), instances_prompt.end());
|
||||
}
|
||||
|
||||
for (const auto & n_gen : params.n_gen) {
|
||||
if (n_gen == 0) {
|
||||
continue;
|
||||
}
|
||||
auto instances_gen = get_cmd_params_instances_int(params, n_gen, 0);
|
||||
instances.insert(instances.end(), instances_gen.begin(), instances_gen.end());
|
||||
}
|
||||
|
||||
return instances;
|
||||
}
|
||||
|
||||
struct test {
|
||||
static const std::string build_commit;
|
||||
static const int build_number;
|
||||
static const bool cuda;
|
||||
static const bool opencl;
|
||||
static const bool metal;
|
||||
static const bool gpu_blas;
|
||||
static const bool blas;
|
||||
static const std::string cpu_info;
|
||||
static const std::string gpu_info;
|
||||
std::string model_filename;
|
||||
std::string model_type;
|
||||
int n_batch;
|
||||
int n_threads;
|
||||
bool f32_kv;
|
||||
int n_gpu_layers;
|
||||
int main_gpu;
|
||||
bool mul_mat_q;
|
||||
bool low_vram;
|
||||
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
||||
int n_prompt;
|
||||
int n_gen;
|
||||
std::string test_time;
|
||||
std::vector<uint64_t> samples_ns;
|
||||
|
||||
test(const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
|
||||
model_filename = inst.model;
|
||||
char buf[128];
|
||||
llama_model_type(lmodel, buf, sizeof(buf));
|
||||
model_type = buf;
|
||||
n_batch = inst.n_batch;
|
||||
n_threads = inst.n_threads;
|
||||
f32_kv = inst.f32_kv;
|
||||
n_gpu_layers = inst.n_gpu_layers;
|
||||
main_gpu = inst.main_gpu;
|
||||
mul_mat_q = inst.mul_mat_q;
|
||||
low_vram = inst.low_vram;
|
||||
tensor_split = inst.tensor_split;
|
||||
n_prompt = inst.n_prompt;
|
||||
n_gen = inst.n_gen;
|
||||
// RFC 3339 date-time format
|
||||
time_t t = time(NULL);
|
||||
std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
|
||||
test_time = buf;
|
||||
|
||||
(void) ctx;
|
||||
}
|
||||
|
||||
uint64_t avg_ns() const {
|
||||
return ::avg(samples_ns);
|
||||
}
|
||||
|
||||
uint64_t stdev_ns() const {
|
||||
return ::stdev(samples_ns);
|
||||
}
|
||||
|
||||
std::vector<double> get_ts() const {
|
||||
int n_tokens = n_prompt + n_gen;
|
||||
std::vector<double> ts;
|
||||
std::transform(samples_ns.begin(), samples_ns.end(), std::back_inserter(ts), [n_tokens](uint64_t t) { return 1e9 * n_tokens / t; });
|
||||
return ts;
|
||||
}
|
||||
|
||||
double avg_ts() const {
|
||||
return ::avg(get_ts());
|
||||
}
|
||||
|
||||
double stdev_ts() const {
|
||||
return ::stdev(get_ts());
|
||||
}
|
||||
|
||||
static std::string get_backend() {
|
||||
if (cuda) {
|
||||
return "CUDA";
|
||||
}
|
||||
if (opencl) {
|
||||
return "OpenCL";
|
||||
}
|
||||
if (metal) {
|
||||
return "Metal";
|
||||
}
|
||||
if (gpu_blas) {
|
||||
return "GPU BLAS";
|
||||
}
|
||||
if (blas) {
|
||||
return "BLAS";
|
||||
}
|
||||
return "CPU";
|
||||
}
|
||||
|
||||
static const std::vector<std::string> & get_fields() {
|
||||
static const std::vector<std::string> fields = {
|
||||
"build_commit", "build_number",
|
||||
"cuda", "opencl", "metal", "gpu_blas", "blas",
|
||||
"cpu_info", "gpu_info",
|
||||
"model_filename", "model_type",
|
||||
"n_batch", "n_threads", "f16_kv",
|
||||
"n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
|
||||
"n_prompt", "n_gen", "test_time",
|
||||
"avg_ns", "stddev_ns",
|
||||
"avg_ts", "stddev_ts"
|
||||
};
|
||||
return fields;
|
||||
}
|
||||
|
||||
enum field_type {STRING, BOOL, INT, FLOAT};
|
||||
|
||||
static field_type get_field_type(const std::string & field) {
|
||||
if (field == "build_number" || field == "n_batch" || field == "n_threads" ||
|
||||
field == "n_gpu_layers" || field == "main_gpu" ||
|
||||
field == "n_prompt" || field == "n_gen" ||
|
||||
field == "avg_ns" || field == "stddev_ns") {
|
||||
return INT;
|
||||
}
|
||||
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" ||
|
||||
field == "f16_kv" || field == "mul_mat_q" || field == "low_vram") {
|
||||
return BOOL;
|
||||
}
|
||||
if (field == "avg_ts" || field == "stddev_ts") {
|
||||
return FLOAT;
|
||||
}
|
||||
return STRING;
|
||||
}
|
||||
|
||||
std::vector<std::string> get_values() const {
|
||||
std::string tensor_split_str;
|
||||
int max_nonzero = 0;
|
||||
for (int i = 0; i < LLAMA_MAX_DEVICES; i++) {
|
||||
if (tensor_split[i] > 0) {
|
||||
max_nonzero = i;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i <= max_nonzero; i++) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%.2f", tensor_split[i]);
|
||||
tensor_split_str += buf;
|
||||
if (i < max_nonzero) {
|
||||
tensor_split_str += "/";
|
||||
}
|
||||
}
|
||||
std::vector<std::string> values = {
|
||||
build_commit, std::to_string(build_number),
|
||||
std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
|
||||
cpu_info, gpu_info,
|
||||
model_filename, model_type,
|
||||
std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
|
||||
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
|
||||
std::to_string(n_prompt), std::to_string(n_gen), test_time,
|
||||
std::to_string(avg_ns()), std::to_string(stdev_ns()),
|
||||
std::to_string(avg_ts()), std::to_string(stdev_ts())
|
||||
};
|
||||
return values;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> get_map() const {
|
||||
std::map<std::string, std::string> map;
|
||||
auto fields = get_fields();
|
||||
auto values = get_values();
|
||||
std::transform(fields.begin(), fields.end(), values.begin(),
|
||||
std::inserter(map, map.end()), std::make_pair<const std::string &, const std::string &>);
|
||||
return map;
|
||||
}
|
||||
};
|
||||
|
||||
const std::string test::build_commit = BUILD_COMMIT;
|
||||
const int test::build_number = BUILD_NUMBER;
|
||||
const bool test::cuda = !!ggml_cpu_has_cublas();
|
||||
const bool test::opencl = !!ggml_cpu_has_clblast();
|
||||
const bool test::metal = !!ggml_cpu_has_metal();
|
||||
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
|
||||
const bool test::blas = !!ggml_cpu_has_blas();
|
||||
const std::string test::cpu_info = get_cpu_info();
|
||||
const std::string test::gpu_info = get_gpu_info();
|
||||
|
||||
struct printer {
|
||||
virtual ~printer() {}
|
||||
|
||||
FILE * fout;
|
||||
virtual void print_header(const cmd_params & params) { (void) params; };
|
||||
virtual void print_test(const test & t) = 0;
|
||||
virtual void print_footer() { };
|
||||
};
|
||||
|
||||
struct csv_printer : public printer {
|
||||
static std::string escape_csv(const std::string & field) {
|
||||
std::string escaped = "\"";
|
||||
for (auto c : field) {
|
||||
if (c == '"') {
|
||||
escaped += "\"";
|
||||
}
|
||||
escaped += c;
|
||||
}
|
||||
escaped += "\"";
|
||||
return escaped;
|
||||
}
|
||||
|
||||
void print_header(const cmd_params & params) override {
|
||||
std::vector<std::string> fields = test::get_fields();
|
||||
fprintf(fout, "%s\n", join(fields, ",").c_str());
|
||||
(void) params;
|
||||
}
|
||||
|
||||
void print_test(const test & t) override {
|
||||
std::vector<std::string> values = t.get_values();
|
||||
std::transform(values.begin(), values.end(), values.begin(), escape_csv);
|
||||
fprintf(fout, "%s\n", join(values, ",").c_str());
|
||||
}
|
||||
};
|
||||
|
||||
struct json_printer : public printer {
|
||||
bool first = true;
|
||||
|
||||
static std::string escape_json(const std::string & value) {
|
||||
std::string escaped;
|
||||
for (auto c : value) {
|
||||
if (c == '"') {
|
||||
escaped += "\\\"";
|
||||
} else if (c == '\\') {
|
||||
escaped += "\\\\";
|
||||
} else if (c <= 0x1f) {
|
||||
char buf[8];
|
||||
snprintf(buf, sizeof(buf), "\\u%04x", c);
|
||||
escaped += buf;
|
||||
} else {
|
||||
escaped += c;
|
||||
}
|
||||
}
|
||||
return escaped;
|
||||
}
|
||||
|
||||
static std::string format_value(const std::string & field, const std::string & value) {
|
||||
switch (test::get_field_type(field)) {
|
||||
case test::STRING:
|
||||
return "\"" + escape_json(value) + "\"";
|
||||
case test::BOOL:
|
||||
return value == "0" ? "false" : "true";
|
||||
default:
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
void print_header(const cmd_params & params) override {
|
||||
fprintf(fout, "[\n");
|
||||
(void) params;
|
||||
}
|
||||
|
||||
void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) {
|
||||
assert(fields.size() == values.size());
|
||||
for (size_t i = 0; i < fields.size(); i++) {
|
||||
fprintf(fout, " \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void print_test(const test & t) override {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
fprintf(fout, ",\n");
|
||||
}
|
||||
fprintf(fout, " {\n");
|
||||
print_fields(test::get_fields(), t.get_values());
|
||||
fprintf(fout, " \"samples_ns\": [ %s ],\n", join(t.samples_ns, ", ").c_str());
|
||||
fprintf(fout, " \"samples_ts\": [ %s ]\n", join(t.get_ts(), ", ").c_str());
|
||||
fprintf(fout, " }");
|
||||
fflush(fout);
|
||||
}
|
||||
|
||||
void print_footer() override {
|
||||
fprintf(fout, "\n]\n");
|
||||
}
|
||||
};
|
||||
|
||||
struct markdown_printer : public printer {
|
||||
std::vector<std::string> fields;
|
||||
|
||||
static int get_field_width(const std::string & field) {
|
||||
if (field == "model") {
|
||||
return -30;
|
||||
}
|
||||
if (field == "t/s") {
|
||||
return 15;
|
||||
}
|
||||
int width = std::max((int)field.length(), 10);
|
||||
|
||||
if (test::get_field_type(field) == test::STRING) {
|
||||
return -width;
|
||||
}
|
||||
return width;
|
||||
}
|
||||
|
||||
void print_header(const cmd_params & params) override {
|
||||
// select fields to print
|
||||
fields = { "model", "backend" };
|
||||
bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS";
|
||||
if (!is_cpu_backend) {
|
||||
fields.push_back("n_gpu_layers");
|
||||
}
|
||||
if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {
|
||||
fields.push_back("n_threads");
|
||||
}
|
||||
if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
|
||||
fields.push_back("n_batch");
|
||||
}
|
||||
if (params.f32_kv.size() > 1 || params.f32_kv != cmd_params_defaults.f32_kv) {
|
||||
fields.push_back("f16_kv");
|
||||
}
|
||||
if (params.main_gpu.size() > 1 || params.main_gpu != cmd_params_defaults.main_gpu) {
|
||||
fields.push_back("main_gpu");
|
||||
}
|
||||
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
|
||||
fields.push_back("mul_mat_q");
|
||||
}
|
||||
if (params.low_vram.size() > 1 || params.low_vram != cmd_params_defaults.low_vram) {
|
||||
fields.push_back("low_vram");
|
||||
}
|
||||
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
|
||||
fields.push_back("tensor_split");
|
||||
}
|
||||
fields.push_back("test");
|
||||
fields.push_back("t/s");
|
||||
|
||||
fprintf(fout, "|");
|
||||
for (const auto & field : fields) {
|
||||
fprintf(fout, " %*s |", get_field_width(field), field.c_str());
|
||||
}
|
||||
fprintf(fout, "\n");
|
||||
fprintf(fout, "|");
|
||||
for (const auto & field : fields) {
|
||||
int width = get_field_width(field);
|
||||
fprintf(fout, " %s%s |", std::string(std::abs(width) - 1, '-').c_str(), width > 0 ? ":" : "-");
|
||||
}
|
||||
fprintf(fout, "\n");
|
||||
}
|
||||
|
||||
void print_test(const test & t) override {
|
||||
std::map<std::string, std::string> vmap = t.get_map();
|
||||
|
||||
fprintf(fout, "|");
|
||||
for (const auto & field : fields) {
|
||||
std::string value;
|
||||
if (field == "model") {
|
||||
value = t.model_type;
|
||||
} else if (field == "backend") {
|
||||
value = test::get_backend();
|
||||
} else if (field == "test") {
|
||||
char buf[128];
|
||||
if (t.n_prompt > 0 && t.n_gen == 0) {
|
||||
snprintf(buf, sizeof(buf), "pp %d", t.n_prompt);
|
||||
} else if (t.n_gen > 0 && t.n_prompt == 0) {
|
||||
snprintf(buf, sizeof(buf), "tg %d", t.n_gen);
|
||||
} else {
|
||||
assert(false);
|
||||
exit(1);
|
||||
}
|
||||
value = buf;
|
||||
} else if (field == "t/s") {
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf), "%.2f ± %.2f", t.avg_ts(), t.stdev_ts());
|
||||
value = buf;
|
||||
} else if (vmap.find(field) != vmap.end()) {
|
||||
value = vmap.at(field);
|
||||
} else {
|
||||
assert(false);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int width = get_field_width(field);
|
||||
if (field == "t/s") {
|
||||
// HACK: the utf-8 character is 2 bytes
|
||||
width += 1;
|
||||
}
|
||||
fprintf(fout, " %*s |", width, value.c_str());
|
||||
}
|
||||
fprintf(fout, "\n");
|
||||
}
|
||||
|
||||
void print_footer() override {
|
||||
fprintf(fout, "\nbuild: %s (%d)\n", test::build_commit.c_str(), test::build_number);
|
||||
}
|
||||
};
|
||||
|
||||
struct sql_printer : public printer {
|
||||
static std::string get_sql_field_type(const std::string & field) {
|
||||
switch (test::get_field_type(field)) {
|
||||
case test::STRING:
|
||||
return "TEXT";
|
||||
case test::BOOL:
|
||||
case test::INT:
|
||||
return "INTEGER";
|
||||
case test::FLOAT:
|
||||
return "REAL";
|
||||
default:
|
||||
assert(false);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void print_header(const cmd_params & params) override {
|
||||
std::vector<std::string> fields = test::get_fields();
|
||||
fprintf(fout, "CREATE TABLE IF NOT EXISTS test (\n");
|
||||
for (size_t i = 0; i < fields.size(); i++) {
|
||||
fprintf(fout, " %s %s%s\n", fields.at(i).c_str(), get_sql_field_type(fields.at(i)).c_str(), i < fields.size() - 1 ? "," : "");
|
||||
}
|
||||
fprintf(fout, ");\n");
|
||||
fprintf(fout, "\n");
|
||||
(void) params;
|
||||
}
|
||||
|
||||
void print_test(const test & t) override {
|
||||
fprintf(fout, "INSERT INTO test (%s) ", join(test::get_fields(), ", ").c_str());
|
||||
fprintf(fout, "VALUES (");
|
||||
std::vector<std::string> values = t.get_values();
|
||||
for (size_t i = 0; i < values.size(); i++) {
|
||||
fprintf(fout, "'%s'%s", values.at(i).c_str(), i < values.size() - 1 ? ", " : "");
|
||||
}
|
||||
fprintf(fout, ");\n");
|
||||
}
|
||||
};
|
||||
|
||||
static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
|
||||
std::vector<llama_token> tokens(n_batch, llama_token_bos(ctx));
|
||||
int n_processed = 0;
|
||||
while (n_processed < n_prompt) {
|
||||
int n_tokens = std::min(n_prompt - n_processed, n_batch);
|
||||
llama_eval(ctx, tokens.data(), n_tokens, n_past + n_processed, n_threads);
|
||||
n_processed += n_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
||||
llama_token token = llama_token_bos(ctx);
|
||||
for (int i = 0; i < n_gen; i++) {
|
||||
llama_eval(ctx, &token, 1, n_past + i, n_threads);
|
||||
}
|
||||
}
|
||||
|
||||
static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
|
||||
(void) level;
|
||||
(void) text;
|
||||
(void) user_data;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
#if !defined(NDEBUG)
|
||||
fprintf(stderr, "warning: asserts enabled, performance may be affected\n");
|
||||
#endif
|
||||
|
||||
#if (defined(_MSC_VER) && defined(_DEBUG)) || (!defined(_MSC_VER) && !defined(__OPTIMIZE__))
|
||||
fprintf(stderr, "warning: debug build, performance may be affected\n");
|
||||
#endif
|
||||
|
||||
#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__)
|
||||
fprintf(stderr, "warning: sanitizer enabled, performance may be affected\n");
|
||||
#endif
|
||||
|
||||
cmd_params params = parse_cmd_params(argc, argv);
|
||||
|
||||
// initialize llama.cpp
|
||||
if (!params.verbose) {
|
||||
llama_log_set(llama_null_log_callback, NULL);
|
||||
}
|
||||
bool numa = false;
|
||||
llama_backend_init(numa);
|
||||
|
||||
// initialize printer
|
||||
std::unique_ptr<printer> p;
|
||||
switch (params.output_format) {
|
||||
case CSV:
|
||||
p.reset(new csv_printer());
|
||||
break;
|
||||
case JSON:
|
||||
p.reset(new json_printer());
|
||||
break;
|
||||
case MARKDOWN:
|
||||
p.reset(new markdown_printer());
|
||||
break;
|
||||
case SQL:
|
||||
p.reset(new sql_printer());
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
exit(1);
|
||||
}
|
||||
p->fout = stdout;
|
||||
p->print_header(params);
|
||||
|
||||
std::vector<cmd_params_instance> params_instances = get_cmd_params_instances(params);
|
||||
|
||||
for (const auto & inst : params_instances) {
|
||||
// TODO: keep the model between tests when possible
|
||||
llama_context_params lparams = inst.to_llama_params();
|
||||
|
||||
llama_model * lmodel = llama_load_model_from_file(inst.model.c_str(), lparams);
|
||||
if (lmodel == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, inst.model.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(lmodel, lparams);
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, inst.model.c_str());
|
||||
llama_free_model(lmodel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
test t(inst, lmodel, ctx);
|
||||
|
||||
// warmup run
|
||||
test_gen(ctx, 1, 0, t.n_threads);
|
||||
|
||||
for (int i = 0; i < params.reps; i++) {
|
||||
uint64_t t_start = get_time_ns();
|
||||
if (t.n_prompt > 0) {
|
||||
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
||||
}
|
||||
if (t.n_gen > 0) {
|
||||
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
|
||||
}
|
||||
uint64_t t_ns = get_time_ns() - t_start;
|
||||
t.samples_ns.push_back(t_ns);
|
||||
}
|
||||
|
||||
p->print_test(t);
|
||||
|
||||
llama_print_timings(ctx);
|
||||
|
||||
llama_free(ctx);
|
||||
llama_free_model(lmodel);
|
||||
}
|
||||
|
||||
p->print_footer();
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
return 0;
|
||||
}
|
||||
132
examples/llama.vim
Normal file
132
examples/llama.vim
Normal file
@@ -0,0 +1,132 @@
|
||||
" Requires an already running llama.cpp server
|
||||
" To install either copy or symlink to ~/.vim/autoload/llama.vim
|
||||
" Then start with either :call llama#doLlamaGen(),
|
||||
" or add a keybind to your vimrc such as
|
||||
" nnoremap Z :call llama#doLlamaGen()<CR>
|
||||
" Similarly, you could add an insert mode keybind with
|
||||
" inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
|
||||
"
|
||||
" g:llama_api_url and g:llama_overrides can be configured in your .vimrc
|
||||
" let g:llama_api_url = "192.168.1.10:8080"
|
||||
" llama_overrides can also be set through buffer/window scopes. For instance
|
||||
" autocmd filetype python let b:llama_overrides = {"temp": 0.2}
|
||||
" Could be added to your .vimrc to automatically set a lower temperature when
|
||||
" editing a python script
|
||||
" Additionally, an override dict can be stored at the top of a file
|
||||
" !*{"stop": ["User:"]}
|
||||
" Could be added to the start of your chatlog.txt to set the stopping token
|
||||
" These parameter dicts are merged together from lowest to highest priority:
|
||||
" server default -> g:llama_overrides -> w:llama_overrides ->
|
||||
" b:llama_overrides -> in file (!*) overrides
|
||||
"
|
||||
" Sublists (like logit_bias and stop) are overridden, not merged
|
||||
" Example override:
|
||||
" !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
|
||||
if !exists("g:llama_api_url")
|
||||
let g:llama_api_url= "127.0.0.1:8080"
|
||||
endif
|
||||
if !exists("g:llama_overrides")
|
||||
let g:llama_overrides = {}
|
||||
endif
|
||||
const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
|
||||
const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
|
||||
let s:linedict = {}
|
||||
|
||||
func s:callbackHandler(bufn, channel, msg)
|
||||
if len(a:msg) < 3
|
||||
return
|
||||
elseif a:msg[0] == "d"
|
||||
let l:msg = a:msg[6:-1]
|
||||
else
|
||||
let l:msg = a:msg
|
||||
endif
|
||||
let l:decoded_msg = json_decode(l:msg)
|
||||
let l:newtext = split(l:decoded_msg['content'], "\n", 1)
|
||||
if len(l:newtext) > 0
|
||||
call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
|
||||
else
|
||||
echo "nothing genned"
|
||||
endif
|
||||
if len(newtext) > 1
|
||||
let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
|
||||
let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
|
||||
endif
|
||||
if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
|
||||
echo "Finished generation"
|
||||
endif
|
||||
endfunction
|
||||
|
||||
func llama#doLlamaGen()
|
||||
if exists("b:job")
|
||||
if job_status(b:job) == "run"
|
||||
call job_stop(b:job)
|
||||
return
|
||||
endif
|
||||
endif
|
||||
|
||||
let l:cbuffer = bufnr("%")
|
||||
let s:linedict[l:cbuffer] = line('$')
|
||||
let l:buflines = getbufline(l:cbuffer, 1, 1000)
|
||||
let l:querydata = copy(s:querydata)
|
||||
call extend(l:querydata, g:llama_overrides)
|
||||
if exists("w:llama_overrides")
|
||||
call extend(l:querydata, w:llama_overrides)
|
||||
endif
|
||||
if exists("b:llama_overrides")
|
||||
call extend(l:querydata, b:llama_overrides)
|
||||
endif
|
||||
if l:buflines[0][0:1] == '!*'
|
||||
let l:userdata = json_decode(l:buflines[0][2:-1])
|
||||
call extend(l:querydata, l:userdata)
|
||||
let l:buflines = l:buflines[1:-1]
|
||||
endif
|
||||
let l:querydata.prompt = join(l:buflines, "\n")
|
||||
let l:curlcommand = copy(s:curlcommand)
|
||||
let l:curlcommand[2] = json_encode(l:querydata)
|
||||
let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
|
||||
endfunction
|
||||
|
||||
" Echos the tokkenization of the provided string , or cursor to end of word
|
||||
" Onus is placed on the user to include the preceding space
|
||||
func llama#tokenizeWord(...)
|
||||
if (a:0 > 0)
|
||||
let l:input = a:1
|
||||
else
|
||||
exe "normal \"*ye"
|
||||
let l:input = @*
|
||||
endif
|
||||
let l:querydata = {"content": l:input}
|
||||
let l:curlcommand = copy(s:curlcommand)
|
||||
let l:curlcommand[2] = json_encode(l:querydata)
|
||||
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
|
||||
let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
|
||||
endfunction
|
||||
|
||||
func s:tokenizeWordCallback(plaintext, channel, msg)
|
||||
echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
|
||||
endfunction
|
||||
|
||||
|
||||
" Echos the token count of the entire buffer (or provided string)
|
||||
" Example usage :echo llama#tokenCount()
|
||||
func llama#tokenCount(...)
|
||||
if (a:0 > 0)
|
||||
let l:buflines = a:1
|
||||
else
|
||||
let l:buflines = getline(1,1000)
|
||||
if l:buflines[0][0:1] == '!*'
|
||||
let l:buflines = l:buflines[1:-1]
|
||||
endif
|
||||
let l:buflines = join(l:buflines, "\n")
|
||||
endif
|
||||
let l:querydata = {"content": l:buflines}
|
||||
let l:curlcommand = copy(s:curlcommand)
|
||||
let l:curlcommand[2] = json_encode(l:querydata)
|
||||
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
|
||||
let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
|
||||
endfunction
|
||||
|
||||
func s:tokenCountCallback(channel, msg)
|
||||
let resp = json_decode(a:msg)
|
||||
echo len(resp.tokens)
|
||||
endfunction
|
||||
@@ -1,3 +1,5 @@
|
||||
" Basic plugin example
|
||||
|
||||
function! Llm()
|
||||
|
||||
let url = "http://127.0.0.1:8080/completion"
|
||||
@@ -16,8 +18,10 @@ function! Llm()
|
||||
" Extract the content field from the response
|
||||
let content = json_decode(response).content
|
||||
|
||||
let split_newlines = split(content, '\n', 1)
|
||||
|
||||
" Insert the content at the cursor position
|
||||
call setline(line('.'), getline('.') . content)
|
||||
call setline(line('.'), [ getline('.') . split_newlines[0] ] + split_newlines[1:])
|
||||
endfunction
|
||||
|
||||
command! Llm call Llm()
|
||||
|
||||
@@ -140,6 +140,12 @@ The `--ctx-size` option allows you to set the size of the prompt context used by
|
||||
|
||||
- `-c N, --ctx-size N`: Set the size of the prompt context (default: 512). The LLaMA models were built with a context of 2048, which will yield the best results on longer input/inference. However, increasing the context size beyond 2048 may lead to unpredictable results.
|
||||
|
||||
### Extended Context Size
|
||||
|
||||
Some fine-tuned models have extened the context length by scaling RoPE. For example, if the original pretrained model have a context length (max sequence length) of 4096 (4k) and the fine-tuned model have 32k. That is a scaling factor of 8, and should work by setting the above `--ctx-size` to 32768 (32k) and `--rope-scale` to 8.
|
||||
|
||||
- `--rope-scale N`: Where N is the linear scaling factor used by the fine-tuned model.
|
||||
|
||||
### Keep Prompt
|
||||
|
||||
The `--keep` option allows users to retain the original prompt when the model runs out of context, ensuring a connection to the initial instruction or conversation topic is maintained.
|
||||
@@ -154,9 +160,13 @@ The following options allow you to control the text generation process and fine-
|
||||
|
||||
### Number of Tokens to Predict
|
||||
|
||||
- `-n N, --n-predict N`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity).
|
||||
- `-n N, --n-predict N`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity, -2 = until context filled)
|
||||
|
||||
The `--n-predict` option controls the number of tokens the model generates in response to the input prompt. By adjusting this value, you can influence the length of the generated text. A higher value will result in longer text, while a lower value will produce shorter text. A value of -1 will cause text to be generated without limit.
|
||||
The `--n-predict` option controls the number of tokens the model generates in response to the input prompt. By adjusting this value, you can influence the length of the generated text. A higher value will result in longer text, while a lower value will produce shorter text.
|
||||
|
||||
A value of -1 will enable infinite text generation, even though we have a finite context window. When the context window is full, some of the earlier tokens (half of the tokens after `--n-keep`) will be discarded. The context must then be re-evaluated before generation can resume. On large models and/or large context windows, this will result in significant pause in output.
|
||||
|
||||
If the pause is undesirable, a value of -2 will stop generation immediately when the context is filled.
|
||||
|
||||
It is important to note that the generated text may be shorter than the specified number of tokens if an End-of-Sequence (EOS) token or a reverse prompt is encountered. In interactive mode text generation will pause and control will be returned to the user. In non-interactive mode, the program will end. In both cases, the text generation may stop before reaching the specified `n-predict` value. If you want the model to keep going without ever producing End-of-Sequence on its own, you can use the `--ignore-eos` parameter.
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
#include "console.h"
|
||||
#include "llama.h"
|
||||
#include "build-info.h"
|
||||
#include "grammar-parser.h"
|
||||
@@ -35,9 +36,7 @@
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
static console_state con_st;
|
||||
static llama_context ** g_ctx;
|
||||
|
||||
static bool is_interacting = false;
|
||||
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
|
||||
@@ -46,7 +45,7 @@ void sigint_handler(int signo) {
|
||||
if (!is_interacting) {
|
||||
is_interacting=true;
|
||||
} else {
|
||||
console_cleanup(con_st);
|
||||
console::cleanup();
|
||||
printf("\n");
|
||||
llama_print_timings(*g_ctx);
|
||||
_exit(130);
|
||||
@@ -64,10 +63,8 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// save choice to use color for later
|
||||
// (note for later: this is a slightly awkward choice)
|
||||
con_st.use_color = params.use_color;
|
||||
con_st.multiline_input = params.multiline_input;
|
||||
console_init(con_st);
|
||||
atexit([]() { console_cleanup(con_st); });
|
||||
console::init(params.simple_io, params.use_color);
|
||||
atexit([]() { console::cleanup(); });
|
||||
|
||||
if (params.perplexity) {
|
||||
printf("\n************\n");
|
||||
@@ -146,7 +143,7 @@ int main(int argc, char ** argv) {
|
||||
{
|
||||
fprintf(stderr, "%s: testing memory usage for n_batch = %d, n_ctx = %d\n", __func__, params.n_batch, params.n_ctx);
|
||||
|
||||
const std::vector<llama_token> tmp(params.n_batch, llama_token_bos());
|
||||
const std::vector<llama_token> tmp(params.n_batch, llama_token_bos(ctx));
|
||||
llama_eval(ctx, tmp.data(), tmp.size(), params.n_ctx, params.n_threads);
|
||||
}
|
||||
|
||||
@@ -194,10 +191,6 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// tokenize the prompt
|
||||
std::vector<llama_token> embd_inp;
|
||||
|
||||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||
params.prompt.insert(0, 1, ' ');
|
||||
|
||||
if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) {
|
||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||
} else {
|
||||
@@ -273,15 +266,12 @@ int main(int argc, char ** argv) {
|
||||
params.interactive = true;
|
||||
}
|
||||
|
||||
// determine newline token
|
||||
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
||||
|
||||
if (params.verbose_prompt) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
|
||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]).c_str());
|
||||
}
|
||||
|
||||
if (ctx_guidance) {
|
||||
@@ -289,14 +279,14 @@ int main(int argc, char ** argv) {
|
||||
fprintf(stderr, "%s: negative prompt: '%s'\n", __func__, params.cfg_negative_prompt.c_str());
|
||||
fprintf(stderr, "%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size());
|
||||
for (int i = 0; i < (int) guidance_inp.size(); i++) {
|
||||
fprintf(stderr, "%6d -> '%s'\n", guidance_inp[i], llama_token_to_str(ctx, guidance_inp[i]));
|
||||
fprintf(stderr, "%6d -> '%s'\n", guidance_inp[i], llama_token_to_str(ctx, guidance_inp[i]).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (params.n_keep > 0) {
|
||||
fprintf(stderr, "%s: static prompt based on n_keep: '", __func__);
|
||||
for (int i = 0; i < params.n_keep; i++) {
|
||||
fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]));
|
||||
fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i]).c_str());
|
||||
}
|
||||
fprintf(stderr, "'\n");
|
||||
}
|
||||
@@ -314,7 +304,7 @@ int main(int argc, char ** argv) {
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
|
||||
};
|
||||
SetConsoleCtrlHandler(static_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
#endif
|
||||
|
||||
fprintf(stderr, "%s: interactive mode on.\n", __func__);
|
||||
@@ -355,10 +345,9 @@ int main(int argc, char ** argv) {
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
{
|
||||
auto it = params.logit_bias.find(llama_token_eos());
|
||||
auto it = params.logit_bias.find(llama_token_eos(ctx));
|
||||
if (it != params.logit_bias.end() && it->second == -INFINITY) {
|
||||
fprintf(stderr,
|
||||
"%s: warning: EOS token is disabled, which will cause most grammars to fail\n", __func__);
|
||||
fprintf(stderr, "%s: warning: EOS token is disabled, which will cause most grammars to fail\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,7 +362,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (params.interactive) {
|
||||
const char *control_message;
|
||||
if (con_st.multiline_input) {
|
||||
if (params.multiline_input) {
|
||||
control_message = " - To return control to LLaMa, end your input with '\\'.\n"
|
||||
" - To return control without starting a new line, end your input with '/'.\n";
|
||||
} else {
|
||||
@@ -401,14 +390,14 @@ int main(int argc, char ** argv) {
|
||||
int n_past_guidance = 0;
|
||||
|
||||
// the first thing we will do is to output the prompt, so set color accordingly
|
||||
console_set_color(con_st, CONSOLE_COLOR_PROMPT);
|
||||
console::set_display(console::prompt);
|
||||
|
||||
std::vector<llama_token> embd;
|
||||
std::vector<llama_token> embd_guidance;
|
||||
|
||||
// do one empty run to warm up the model
|
||||
{
|
||||
const std::vector<llama_token> tmp = { llama_token_bos(), };
|
||||
const std::vector<llama_token> tmp = { llama_token_bos(ctx), };
|
||||
llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
|
||||
llama_reset_timings(ctx);
|
||||
}
|
||||
@@ -422,9 +411,9 @@ int main(int argc, char ** argv) {
|
||||
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
|
||||
if ((int)embd.size() > max_embd_size) {
|
||||
auto skipped_tokens = embd.size() - max_embd_size;
|
||||
console_set_color(con_st, CONSOLE_COLOR_ERROR);
|
||||
console::set_display(console::error);
|
||||
printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
|
||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||
console::set_display(console::reset);
|
||||
fflush(stdout);
|
||||
embd.resize(max_embd_size);
|
||||
}
|
||||
@@ -434,8 +423,12 @@ int main(int argc, char ** argv) {
|
||||
// - take the n_keep first tokens from the original prompt (via n_past)
|
||||
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
|
||||
if (n_past + (int) embd.size() + std::max<int>(0, guidance_offset) > n_ctx) {
|
||||
const int n_left = n_past - params.n_keep;
|
||||
if (params.n_predict == -2) {
|
||||
fprintf(stderr, "\n\n%s: context full, stopping generation\n", __func__);
|
||||
break;
|
||||
}
|
||||
|
||||
const int n_left = n_past - params.n_keep;
|
||||
// always keep the first token - BOS
|
||||
n_past = std::max(1, params.n_keep);
|
||||
n_past_guidance = std::max(1, params.n_keep + guidance_offset);
|
||||
@@ -588,7 +581,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// Apply penalties
|
||||
float nl_logit = logits[llama_token_nl()];
|
||||
float nl_logit = logits[llama_token_nl(ctx)];
|
||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
||||
llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
@@ -597,7 +590,7 @@ int main(int argc, char ** argv) {
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
last_n_repeat, alpha_frequency, alpha_presence);
|
||||
if (!penalize_nl) {
|
||||
logits[llama_token_nl()] = nl_logit;
|
||||
logits[llama_token_nl(ctx)] = nl_logit;
|
||||
}
|
||||
|
||||
if (grammar != NULL) {
|
||||
@@ -661,13 +654,13 @@ int main(int argc, char ** argv) {
|
||||
// display text
|
||||
if (input_echo) {
|
||||
for (auto id : embd) {
|
||||
printf("%s", llama_token_to_str(ctx, id));
|
||||
printf("%s", llama_token_to_str(ctx, id).c_str());
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
// reset color to default if we there is no pending user input
|
||||
if (input_echo && (int)embd_inp.size() == n_consumed) {
|
||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||
console::set_display(console::reset);
|
||||
}
|
||||
|
||||
// if not currently processing queued inputs;
|
||||
@@ -693,7 +686,7 @@ int main(int argc, char ** argv) {
|
||||
if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) {
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||
console::set_display(console::user_input);
|
||||
}
|
||||
is_antiprompt = true;
|
||||
fflush(stdout);
|
||||
@@ -703,7 +696,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// deal with end of text token in interactive mode
|
||||
if (last_n_tokens.back() == llama_token_eos()) {
|
||||
if (last_n_tokens.back() == llama_token_eos(ctx)) {
|
||||
if (params.interactive) {
|
||||
if (params.antiprompt.size() != 0) {
|
||||
// tokenize and inject first reverse prompt
|
||||
@@ -714,7 +707,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
is_interacting = true;
|
||||
printf("\n");
|
||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||
console::set_display(console::user_input);
|
||||
fflush(stdout);
|
||||
} else if (params.instruct) {
|
||||
is_interacting = true;
|
||||
@@ -727,7 +720,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
if (params.input_prefix_bos) {
|
||||
embd_inp.push_back(llama_token_bos());
|
||||
embd_inp.push_back(llama_token_bos(ctx));
|
||||
}
|
||||
|
||||
std::string buffer;
|
||||
@@ -739,12 +732,12 @@ int main(int argc, char ** argv) {
|
||||
std::string line;
|
||||
bool another_line = true;
|
||||
do {
|
||||
another_line = console_readline(con_st, line);
|
||||
another_line = console::readline(line, params.multiline_input);
|
||||
buffer += line;
|
||||
} while (another_line);
|
||||
|
||||
// done taking input, reset color
|
||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
||||
console::set_display(console::reset);
|
||||
|
||||
// Add tokens to embd only if the input buffer is non-empty
|
||||
// Entering a empty line lets the user pass control back
|
||||
@@ -781,8 +774,7 @@ int main(int argc, char ** argv) {
|
||||
if (grammar != NULL) {
|
||||
llama_grammar_free(grammar);
|
||||
|
||||
std::vector<const llama_grammar_element *> grammar_rules(
|
||||
parsed_grammar.c_rules());
|
||||
std::vector<const llama_grammar_element *> grammar_rules( parsed_grammar.c_rules());
|
||||
grammar = llama_grammar_init(
|
||||
grammar_rules.data(), grammar_rules.size(),
|
||||
parsed_grammar.symbol_ids.at("root"));
|
||||
@@ -793,7 +785,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// end of text token
|
||||
if (!embd.empty() && embd.back() == llama_token_eos() && !(params.instruct || params.interactive)) {
|
||||
if (!embd.empty() && embd.back() == llama_token_eos(ctx) && !(params.instruct || params.interactive)) {
|
||||
fprintf(stderr, " [end of text]\n");
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
//
|
||||
// - First, export a LLaMA graph:
|
||||
//
|
||||
// $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
|
||||
// $ ./bin/main -m ../models/7B/ggml-model-q4_0.gguf --export
|
||||
//
|
||||
// - Run this tool to evaluate the exported graph:
|
||||
//
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
@@ -63,7 +64,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
||||
|
||||
// add BOS token for the first batch of each chunk
|
||||
if (j == 0) {
|
||||
tokens[batch_start] = llama_token_bos();
|
||||
tokens[batch_start] = llama_token_bos(ctx);
|
||||
}
|
||||
|
||||
if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads)) {
|
||||
@@ -88,7 +89,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
||||
fprintf(stderr, "%d hours ", total_seconds / (60*60));
|
||||
total_seconds = total_seconds % (60*60);
|
||||
}
|
||||
fprintf(stderr, "%d minutes\n", total_seconds / 60);
|
||||
fprintf(stderr, "%.2f minutes\n", total_seconds / 60.0);
|
||||
}
|
||||
|
||||
// We get the logits for all the tokens in the context window (params.n_ctx)
|
||||
@@ -121,6 +122,27 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
std::vector<float> hellaswag_evaluate_tokens(llama_context * ctx, const std::vector<int>& tokens, int n_past, int n_batch,
|
||||
int n_vocab, int n_thread) {
|
||||
std::vector<float> result;
|
||||
result.reserve(tokens.size() * n_vocab);
|
||||
size_t n_chunk = (tokens.size() + n_batch - 1)/n_batch;
|
||||
for (size_t i_chunk = 0; i_chunk < n_chunk; ++i_chunk) {
|
||||
size_t n_tokens = tokens.size() - i_chunk * n_batch;
|
||||
n_tokens = std::min(n_tokens, size_t(n_batch));
|
||||
if (llama_eval(ctx, tokens.data() + i_chunk * n_batch, n_tokens, n_past, n_thread)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto logits = llama_get_logits(ctx);
|
||||
result.insert(result.end(), logits, logits + n_tokens * n_vocab);
|
||||
|
||||
n_past += n_tokens;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
// Calculates hellaswag score (acc_norm) from prompt
|
||||
//
|
||||
@@ -153,7 +175,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
}
|
||||
|
||||
size_t hs_task_count = prompt_lines.size()/6;
|
||||
fprintf(stderr, "%s : loaded %lu tasks from prompt.\n", __func__, hs_task_count);
|
||||
fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);
|
||||
|
||||
// This is needed as usual for LLaMA models
|
||||
bool prepend_bos = true;
|
||||
@@ -178,7 +200,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
double ending_logprob[4];
|
||||
};
|
||||
|
||||
fprintf(stderr, "%s : selecting %lu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") );
|
||||
fprintf(stderr, "%s : selecting %zu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") );
|
||||
|
||||
// Select and read data from prompt lines
|
||||
hs_data_t *hs_data = new hs_data_t[hs_task_count];
|
||||
@@ -209,50 +231,93 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
double acc = 0.0f;
|
||||
const int n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
std::vector<float> tok_logits(n_vocab);
|
||||
|
||||
for (size_t task_idx = 0; task_idx < hs_task_count; task_idx++) {
|
||||
|
||||
// Tokenize the context to count tokens
|
||||
std::vector<int> context_embd = ::llama_tokenize(ctx, hs_data[task_idx].context, prepend_bos);
|
||||
size_t context_size = context_embd.size();
|
||||
|
||||
for (size_t ending_idx=0;ending_idx<4;ending_idx++) {
|
||||
// Do the 1st ending
|
||||
// In this case we include the context when evaluating
|
||||
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[0], prepend_bos);
|
||||
auto query_size = query_embd.size();
|
||||
//printf("First query: %d\n",(int)query_size);
|
||||
|
||||
// Stop if query wont fit the ctx window
|
||||
if (query_size > (size_t)params.n_ctx) {
|
||||
fprintf(stderr, "%s : number of tokens in query %zu > n_ctxl\n", __func__, query_size);
|
||||
return;
|
||||
}
|
||||
|
||||
// Speedup small evaluations by evaluating atleast 32 tokens
|
||||
if (query_size < 32) {
|
||||
query_embd.resize(32);
|
||||
}
|
||||
|
||||
auto logits = hellaswag_evaluate_tokens(ctx, query_embd, 0, params.n_batch, n_vocab, params.n_threads);
|
||||
if (logits.empty()) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
std::memcpy(tok_logits.data(), logits.data() + (context_size-1)*n_vocab, n_vocab*sizeof(float));
|
||||
const auto first_probs = softmax(tok_logits);
|
||||
|
||||
hs_data[task_idx].ending_logprob_count[0] = 1;
|
||||
hs_data[task_idx].ending_logprob[0] = std::log(first_probs[query_embd[context_size]]);
|
||||
|
||||
// Calculate the logprobs over the ending
|
||||
for (size_t j = context_size; j < query_size - 1; j++) {
|
||||
|
||||
std::memcpy(tok_logits.data(), logits.data() + j*n_vocab, n_vocab*sizeof(float));
|
||||
|
||||
const float prob = softmax(tok_logits)[query_embd[j + 1]];
|
||||
|
||||
hs_data[task_idx].ending_logprob[0] += std::log(prob);
|
||||
hs_data[task_idx].ending_logprob_count[0]++;
|
||||
}
|
||||
|
||||
// Calculate the mean token logprob for acc_norm
|
||||
hs_data[task_idx].ending_logprob[0] /= hs_data[task_idx].ending_logprob_count[0];
|
||||
|
||||
// Do the remaining endings
|
||||
// For these, we use the bare ending with n_past = context_size
|
||||
//
|
||||
for (size_t ending_idx = 1; ending_idx < 4; ending_idx++) {
|
||||
|
||||
// Tokenize the query
|
||||
std::vector<int> query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[ending_idx], prepend_bos);
|
||||
size_t query_size = query_embd.size();
|
||||
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
|
||||
query_size = query_embd.size();
|
||||
|
||||
// Stop if query wont fit the ctx window
|
||||
if (query_size > (size_t)params.n_ctx) {
|
||||
fprintf(stderr, "%s : number of tokens in query %lu > n_ctxl\n", __func__, query_size);
|
||||
if (context_size + query_size > (size_t)params.n_ctx) {
|
||||
fprintf(stderr, "%s : number of tokens in query %zu > n_ctxl\n", __func__, query_size);
|
||||
return;
|
||||
}
|
||||
|
||||
// Speedup small evaluations by evaluating atleast 32 tokens
|
||||
if (query_size < 32) {
|
||||
query_embd.resize(32);
|
||||
}
|
||||
// No, resizing to 32 is actually slightly slower (at least on CUDA)
|
||||
//if (query_size < 32) {
|
||||
// query_embd.resize(32);
|
||||
//}
|
||||
|
||||
// Evaluate the query
|
||||
if (llama_eval(ctx, query_embd.data(), query_embd.size(), 0, params.n_threads)) {
|
||||
logits = hellaswag_evaluate_tokens(ctx, query_embd, context_size, params.n_batch, n_vocab, params.n_threads);
|
||||
if (logits.empty()) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto query_logits = llama_get_logits(ctx);
|
||||
std::vector<float> logits;
|
||||
logits.insert(logits.end(), query_logits, query_logits + query_size * n_vocab);
|
||||
|
||||
hs_data[task_idx].ending_logprob_count[ending_idx] = 0;
|
||||
hs_data[task_idx].ending_logprob[ending_idx] = 0.0f;
|
||||
hs_data[task_idx].ending_logprob_count[ending_idx] = 1;
|
||||
hs_data[task_idx].ending_logprob[ending_idx] = std::log(first_probs[query_embd[0]]);
|
||||
|
||||
// Calculate the logprobs over the ending
|
||||
for (size_t j = context_size-1; j < query_size - 1; j++) {
|
||||
// Calculate probability of next token, given the previous ones.
|
||||
const std::vector<float> tok_logits(
|
||||
logits.begin() + (j + 0) * n_vocab,
|
||||
logits.begin() + (j + 1) * n_vocab);
|
||||
for (size_t j = 0; j < query_size - 1; j++) {
|
||||
std::memcpy(tok_logits.data(), logits.data() + j*n_vocab, n_vocab*sizeof(float));
|
||||
|
||||
const float prob = softmax(tok_logits)[query_embd[ j + 1]];
|
||||
const float prob = softmax(tok_logits)[query_embd[j + 1]];
|
||||
|
||||
hs_data[task_idx].ending_logprob[ending_idx] += std::log(prob);
|
||||
hs_data[task_idx].ending_logprob_count[ending_idx]++;
|
||||
@@ -267,9 +332,9 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
}
|
||||
|
||||
// Find the ending with maximum logprob
|
||||
size_t ending_logprob_max_idx = -1;
|
||||
double ending_logprob_max_val = -INFINITY;
|
||||
for (size_t j=0; j < 4; j++) {
|
||||
size_t ending_logprob_max_idx = 0;
|
||||
double ending_logprob_max_val = hs_data[task_idx].ending_logprob[0];
|
||||
for (size_t j = 1; j < 4; j++) {
|
||||
if (hs_data[task_idx].ending_logprob[j] > ending_logprob_max_val) {
|
||||
ending_logprob_max_idx = j;
|
||||
ending_logprob_max_val = hs_data[task_idx].ending_logprob[j];
|
||||
@@ -284,7 +349,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
}
|
||||
|
||||
// Print the accumulated accuracy mean x 100
|
||||
printf("%li\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0);
|
||||
printf("%zu\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#endif
|
||||
|
||||
struct quantize_stats_params {
|
||||
std::string model = "models/7B/ggml-model-f16.bin";
|
||||
std::string model = "models/7B/ggml-model-f16.gguf";
|
||||
bool verbose = false;
|
||||
bool per_layer_stats = false;
|
||||
bool print_histogram = false;
|
||||
|
||||
@@ -68,10 +68,10 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
|
||||
}
|
||||
|
||||
// usage:
|
||||
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
|
||||
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
|
||||
//
|
||||
void usage(const char * executable) {
|
||||
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable);
|
||||
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
|
||||
fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
||||
fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
|
||||
fprintf(stderr, "\nAllowed quantization types:\n");
|
||||
@@ -118,8 +118,8 @@ int main(int argc, char ** argv) {
|
||||
if (pos != std::string::npos) {
|
||||
fpath = fname_inp.substr(0, pos + 1);
|
||||
}
|
||||
// export as [inp path]/ggml-model-[ftype].bin
|
||||
fname_out = fpath + "ggml-model-" + ftype_str + ".bin";
|
||||
// export as [inp path]/ggml-model-[ftype].gguf
|
||||
fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
|
||||
arg_idx++;
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
|
||||
auto lparams = llama_context_default_params();
|
||||
|
||||
lparams.n_ctx = params.n_ctx;
|
||||
lparams.n_gqa = params.n_gqa;
|
||||
lparams.seed = params.seed;
|
||||
lparams.f16_kv = params.memory_f16;
|
||||
lparams.use_mmap = params.use_mmap;
|
||||
@@ -45,9 +44,8 @@ int main(int argc, char ** argv) {
|
||||
llama_free_model(model);
|
||||
return 1;
|
||||
}
|
||||
auto tokens = std::vector<llama_token>(params.n_ctx);
|
||||
auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true);
|
||||
|
||||
auto tokens = llama_tokenize(ctx, params.prompt.c_str(), true);
|
||||
auto n_prompt_tokens = tokens.size();
|
||||
if (n_prompt_tokens < 1) {
|
||||
fprintf(stderr, "%s : failed to tokenize prompt\n", __func__);
|
||||
llama_free(ctx);
|
||||
@@ -92,7 +90,7 @@ int main(int argc, char ** argv) {
|
||||
auto next_token_str = llama_token_to_str(ctx, next_token);
|
||||
last_n_tokens_data.push_back(next_token);
|
||||
|
||||
printf("%s", next_token_str);
|
||||
printf("%s", next_token_str.c_str());
|
||||
if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) {
|
||||
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
||||
llama_free(ctx);
|
||||
@@ -152,7 +150,7 @@ int main(int argc, char ** argv) {
|
||||
auto next_token_str = llama_token_to_str(ctx2, next_token);
|
||||
last_n_tokens_data.push_back(next_token);
|
||||
|
||||
printf("%s", next_token_str);
|
||||
printf("%s", next_token_str.c_str());
|
||||
if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) {
|
||||
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
||||
llama_free(ctx2);
|
||||
|
||||
@@ -5,7 +5,7 @@ This example demonstrates a simple HTTP API server and a simple web front end to
|
||||
Command line options:
|
||||
|
||||
- `--threads N`, `-t N`: Set the number of threads to use during computation.
|
||||
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`).
|
||||
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
|
||||
- `-m ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
|
||||
- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
|
||||
- `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
|
||||
@@ -16,6 +16,7 @@ Command line options:
|
||||
- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. Not recommended.
|
||||
- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped.
|
||||
- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed.
|
||||
- `--numa`: Attempt optimizations that help on some NUMA systems.
|
||||
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
|
||||
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
|
||||
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.
|
||||
@@ -47,15 +48,14 @@ To get started right away, run the following command, making sure to use the cor
|
||||
### Unix-based systems (Linux, macOS, etc.):
|
||||
|
||||
```bash
|
||||
./server -m models/7B/ggml-model.bin -c 2048
|
||||
./server -m models/7B/ggml-model.gguf -c 2048
|
||||
```
|
||||
|
||||
### Windows:
|
||||
|
||||
```powershell
|
||||
server.exe -m models\7B\ggml-model.bin -c 2048
|
||||
server.exe -m models\7B\ggml-model.gguf -c 2048
|
||||
```
|
||||
|
||||
The above command will start a server that by default listens on `127.0.0.1:8080`.
|
||||
You can consume the endpoints with Postman or NodeJS with axios library. You can visit the web front end at the same url.
|
||||
|
||||
@@ -151,6 +151,8 @@ node .
|
||||
|
||||
`mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).
|
||||
|
||||
`grammar`: Set grammar for grammar-based sampling (default: no grammar)
|
||||
|
||||
`seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
|
||||
|
||||
`ignore_eos`: Ignore end of stream token and continue generating (default: false).
|
||||
|
||||
@@ -1,5 +1,34 @@
|
||||
import * as readline from 'node:readline'
|
||||
import { stdin, stdout } from 'node:process'
|
||||
import { readFileSync } from 'node:fs'
|
||||
import { SchemaConverter } from './public/json-schema-to-grammar.mjs'
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const grammarJsonSchemaFile = args.find(
|
||||
(_, index) => args[index - 1] === "--grammar-json-schema"
|
||||
);
|
||||
const grammarFile = args.find((_, index) => args[index - 1] === "--grammar");
|
||||
|
||||
// Example usage: function,arguments
|
||||
const grammarJsonSchemaPropOrder = args.find(
|
||||
(_, index) => args[index - 1] === "--grammar-json-schema-prop-order"
|
||||
);
|
||||
const propOrder = grammarJsonSchemaPropOrder
|
||||
? grammarJsonSchemaPropOrder
|
||||
.split(",")
|
||||
.reduce((acc, cur, index) => ({ ...acc, [cur]: index }), {})
|
||||
: {};
|
||||
|
||||
let grammar = null
|
||||
if (grammarJsonSchemaFile) {
|
||||
const schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
|
||||
const converter = new SchemaConverter(propOrder)
|
||||
converter.visit(schema, '')
|
||||
grammar = converter.formatGrammar()
|
||||
}
|
||||
if (grammarFile) {
|
||||
grammar = readFileSync(grammarFile, 'utf-8')
|
||||
}
|
||||
|
||||
const API_URL = 'http://127.0.0.1:8080'
|
||||
|
||||
@@ -48,6 +77,7 @@ async function chat_completion(question) {
|
||||
n_keep: n_keep,
|
||||
n_predict: 256,
|
||||
stop: ["\n### Human:"], // stop completion after generating this
|
||||
grammar,
|
||||
stream: true,
|
||||
})
|
||||
})
|
||||
|
||||
@@ -87,289 +87,342 @@ unsigned char completion_js[] = {
|
||||
0x20, 0x54, 0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72,
|
||||
0x28, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b,
|
||||
0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20,
|
||||
0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72,
|
||||
0x2e, 0x72, 0x65, 0x61, 0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
||||
0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x73, 0x65, 0x20, 0x61,
|
||||
0x6e, 0x73, 0x77, 0x65, 0x72, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68,
|
||||
0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x6d, 0x75, 0x6c, 0x74, 0x69,
|
||||
0x70, 0x6c, 0x65, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x6f, 0x66,
|
||||
0x3a, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5c, 0x6e, 0x20, 0x77, 0x69,
|
||||
0x74, 0x68, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x61, 0x6c, 0x77, 0x61,
|
||||
0x79, 0x73, 0x20, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x74, 0x20, 0x61,
|
||||
0x73, 0x20, 0x61, 0x20, 0x6b, 0x65, 0x79, 0x2e, 0x20, 0x69, 0x6e, 0x20,
|
||||
0x6f, 0x75, 0x72, 0x20, 0x63, 0x61, 0x73, 0x65, 0x20, 0x77, 0x65, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x6d, 0x61, 0x69,
|
||||
0x6e, 0x6c, 0x79, 0x20, 0x63, 0x61, 0x72, 0x65, 0x20, 0x61, 0x62, 0x6f,
|
||||
0x75, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x64, 0x61, 0x74, 0x61, 0x3a,
|
||||
0x20, 0x6b, 0x65, 0x79, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2c, 0x20, 0x77,
|
||||
0x68, 0x69, 0x63, 0x68, 0x20, 0x77, 0x65, 0x20, 0x65, 0x78, 0x70, 0x65,
|
||||
0x63, 0x74, 0x20, 0x61, 0x73, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74,
|
||||
0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
||||
0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x28, 0x72, 0x65, 0x73,
|
||||
0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x61,
|
||||
0x72, 0x73, 0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20,
|
||||
0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61,
|
||||
0x64, 0x64, 0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72,
|
||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20,
|
||||
0x3d, 0x20, 0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73,
|
||||
0x28, 0x2e, 0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x6f, 0x66, 0x20,
|
||||
0x74, 0x65, 0x78, 0x74, 0x2e, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x41, 0x6c,
|
||||
0x6c, 0x28, 0x72, 0x65, 0x67, 0x65, 0x78, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
||||
0x6c, 0x74, 0x5b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d,
|
||||
0x20, 0x3d, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x20,
|
||||
0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68, 0x69, 0x73,
|
||||
0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
|
||||
0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a, 0x75, 0x73,
|
||||
0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74, 0x68, 0x65,
|
||||
0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64, 0x61, 0x74,
|
||||
0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
||||
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a, 0x53,
|
||||
0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73,
|
||||
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x2b, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
||||
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x79,
|
||||
0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79,
|
||||
0x69, 0x65, 0x6c, 0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b,
|
||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x69,
|
||||
0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20, 0x73,
|
||||
0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66, 0x72,
|
||||
0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20, 0x77,
|
||||
0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
|
||||
0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
||||
0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
|
||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
||||
0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
||||
0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72,
|
||||
0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63,
|
||||
0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d,
|
||||
0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74,
|
||||
0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f,
|
||||
0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74,
|
||||
0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
||||
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
||||
0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74,
|
||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79,
|
||||
0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63, 0x72,
|
||||
0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f,
|
||||
0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72,
|
||||
0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65,
|
||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66,
|
||||
0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
|
||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28,
|
||||
0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28,
|
||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75,
|
||||
0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63,
|
||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e,
|
||||
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70,
|
||||
0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||
0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b,
|
||||
0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20,
|
||||
0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
|
||||
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||
0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72,
|
||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73,
|
||||
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c,
|
||||
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
|
||||
0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
|
||||
0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
|
||||
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
|
||||
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
||||
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
|
||||
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
|
||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
||||
0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74,
|
||||
0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
||||
0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
|
||||
0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f,
|
||||
0x20, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20,
|
||||
0x70, 0x61, 0x72, 0x74, 0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65,
|
||||
0x61, 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65,
|
||||
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75,
|
||||
0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c,
|
||||
0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
|
||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69,
|
||||
0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61,
|
||||
0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||
0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f,
|
||||
0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x2f, 0x2f, 0x20, 0x41, 0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c,
|
||||
0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61,
|
||||
0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72,
|
||||
0x65, 0x6e, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
|
||||
0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20,
|
||||
0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f,
|
||||
0x64, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61,
|
||||
0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x2f, 0x2f, 0x20, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66,
|
||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68,
|
||||
0x61, 0x72, 0x61, 0x63, 0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61,
|
||||
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65,
|
||||
0x42, 0x72, 0x65, 0x61, 0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74,
|
||||
0x2e, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c,
|
||||
0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x2f, 0x2f, 0x20, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65,
|
||||
0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c,
|
||||
0x69, 0x6e, 0x65, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||
0x65, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74,
|
||||
0x65, 0x78, 0x74, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c,
|
||||
0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x2f, 0x2f, 0x20, 0x49, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65,
|
||||
0x78, 0x74, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65,
|
||||
0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69,
|
||||
0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68,
|
||||
0x65, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20,
|
||||
0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f,
|
||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x2f, 0x2f, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74,
|
||||
0x20, 0x69, 0x6e, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
||||
0x20, 0x74, 0x6f, 0x20, 0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64,
|
||||
0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74,
|
||||
0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61,
|
||||
0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||
0x28, 0x21, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69,
|
||||
0x6e, 0x65, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
|
||||
0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e,
|
||||
0x70, 0x6f, 0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
|
||||
0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20,
|
||||
0x52, 0x65, 0x73, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
|
||||
0x65, 0x72, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76,
|
||||
0x65, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65,
|
||||
0x61, 0x6b, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e,
|
||||
0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73,
|
||||
0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64,
|
||||
0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73,
|
||||
0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20,
|
||||
0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e,
|
||||
0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e,
|
||||
0x65, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63,
|
||||
0x68, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78,
|
||||
0x65, 0x63, 0x28, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61,
|
||||
0x74, 0x63, 0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b,
|
||||
0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20,
|
||||
0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69,
|
||||
0x6e, 0x63, 0x65, 0x20, 0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20,
|
||||
0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x2e, 0x63, 0x70, 0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73,
|
||||
0x20, 0x6a, 0x75, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e,
|
||||
0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75,
|
||||
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72,
|
||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d,
|
||||
0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28,
|
||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d,
|
||||
0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||
0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||
0x2f, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c,
|
||||
0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||
0x2f, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20,
|
||||
0x61, 0x20, 0x73, 0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e,
|
||||
0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
|
||||
0x2c, 0x20, 0x77, 0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72,
|
||||
0x65, 0x61, 0x6b, 0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||
0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||
0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61,
|
||||
0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
||||
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||
0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
||||
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
||||
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20,
|
||||
0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63,
|
||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69,
|
||||
0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63,
|
||||
0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43,
|
||||
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22,
|
||||
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20,
|
||||
0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e,
|
||||
0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e,
|
||||
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
||||
0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b,
|
||||
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
||||
0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20,
|
||||
0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65,
|
||||
0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e,
|
||||
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e,
|
||||
0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73,
|
||||
0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70,
|
||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28,
|
||||
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e,
|
||||
0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
||||
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
||||
0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64,
|
||||
0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74,
|
||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d,
|
||||
0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20,
|
||||
0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
|
||||
0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
|
||||
0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
|
||||
0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
|
||||
0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
|
||||
0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
|
||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||
0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
|
||||
0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63,
|
||||
0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f,
|
||||
0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f,
|
||||
0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20,
|
||||
0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||
0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
|
||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72,
|
||||
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
|
||||
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63,
|
||||
0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28,
|
||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
||||
0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78,
|
||||
0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
||||
0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||
0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20,
|
||||
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d,
|
||||
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50,
|
||||
0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
||||
0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72,
|
||||
0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||
0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
||||
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72,
|
||||
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
||||
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||
0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20,
|
||||
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f,
|
||||
0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65,
|
||||
0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65,
|
||||
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
|
||||
0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70,
|
||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
|
||||
0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f,
|
||||
0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61,
|
||||
0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70,
|
||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61,
|
||||
0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74,
|
||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69,
|
||||
0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65,
|
||||
0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69,
|
||||
0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20,
|
||||
0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20,
|
||||
0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20,
|
||||
0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73,
|
||||
0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
|
||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20,
|
||||
0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
|
||||
0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f,
|
||||
0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||
0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61,
|
||||
0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
||||
0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70,
|
||||
0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65,
|
||||
0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e,
|
||||
0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
|
||||
0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63,
|
||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75,
|
||||
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65,
|
||||
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
||||
0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||
0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
|
||||
0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65,
|
||||
0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
|
||||
0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65,
|
||||
0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
|
||||
0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74,
|
||||
0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73,
|
||||
0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74,
|
||||
0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
||||
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
||||
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
||||
0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b,
|
||||
0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75,
|
||||
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69,
|
||||
0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||
0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
|
||||
0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f,
|
||||
0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69,
|
||||
0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29,
|
||||
0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
|
||||
0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||
0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
|
||||
0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
||||
0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f,
|
||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
|
||||
0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20,
|
||||
0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20,
|
||||
0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a,
|
||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||
0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28,
|
||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e,
|
||||
0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d,
|
||||
0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
|
||||
0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f,
|
||||
0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
||||
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f,
|
||||
0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
||||
0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
|
||||
0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
|
||||
0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20,
|
||||
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e,
|
||||
0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20,
|
||||
0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74,
|
||||
0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70,
|
||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
|
||||
0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75,
|
||||
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74,
|
||||
0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
|
||||
0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68,
|
||||
0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a,
|
||||
0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72,
|
||||
0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a,
|
||||
0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
|
||||
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||
0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c,
|
||||
0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20,
|
||||
0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72,
|
||||
0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20,
|
||||
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
|
||||
0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65,
|
||||
0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20,
|
||||
0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68,
|
||||
0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68,
|
||||
0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c,
|
||||
0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74,
|
||||
0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20,
|
||||
0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72,
|
||||
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d,
|
||||
0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65,
|
||||
0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
|
||||
0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61,
|
||||
0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22,
|
||||
0x2f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22,
|
||||
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x77,
|
||||
0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f,
|
||||
0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22, 0x29,
|
||||
0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72,
|
||||
0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67,
|
||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||
};
|
||||
unsigned int completion_js_len = 4462;
|
||||
unsigned int completion_js_len = 5099;
|
||||
|
||||
@@ -11,8 +11,10 @@ echo >> $PUBLIC/index.js # add newline
|
||||
|
||||
FILES=$(ls $PUBLIC)
|
||||
|
||||
cd $PUBLIC
|
||||
for FILE in $FILES; do
|
||||
func=$(echo $FILE | tr '.' '_')
|
||||
echo "generate $FILE.hpp ($func)"
|
||||
xxd -n $func -i $PUBLIC/$FILE > $DIR/$FILE.hpp
|
||||
echo "generate $FILE.hpp"
|
||||
|
||||
# use simple flag for old version of xxd
|
||||
xxd -i $FILE > $DIR/$FILE.hpp
|
||||
done
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
311
examples/server/json-schema-to-grammar.mjs.hpp
Normal file
311
examples/server/json-schema-to-grammar.mjs.hpp
Normal file
@@ -0,0 +1,311 @@
|
||||
unsigned char json_schema_to_grammar_mjs[] = {
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x53, 0x50, 0x41, 0x43, 0x45, 0x5f,
|
||||
0x52, 0x55, 0x4c, 0x45, 0x20, 0x3d, 0x20, 0x27, 0x22, 0x20, 0x22, 0x3f,
|
||||
0x27, 0x3b, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x50, 0x52,
|
||||
0x49, 0x4d, 0x49, 0x54, 0x49, 0x56, 0x45, 0x5f, 0x52, 0x55, 0x4c, 0x45,
|
||||
0x53, 0x20, 0x3d, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x62, 0x6f, 0x6f, 0x6c,
|
||||
0x65, 0x61, 0x6e, 0x3a, 0x20, 0x27, 0x28, 0x22, 0x74, 0x72, 0x75, 0x65,
|
||||
0x22, 0x20, 0x7c, 0x20, 0x22, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x22, 0x29,
|
||||
0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x6e,
|
||||
0x75, 0x6d, 0x62, 0x65, 0x72, 0x3a, 0x20, 0x27, 0x28, 0x22, 0x2d, 0x22,
|
||||
0x3f, 0x20, 0x28, 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0x20, 0x7c, 0x20, 0x5b,
|
||||
0x31, 0x2d, 0x39, 0x5d, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0x2a, 0x29,
|
||||
0x29, 0x20, 0x28, 0x22, 0x2e, 0x22, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x5d,
|
||||
0x2b, 0x29, 0x3f, 0x20, 0x28, 0x5b, 0x65, 0x45, 0x5d, 0x20, 0x5b, 0x2d,
|
||||
0x2b, 0x5d, 0x3f, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0x2b, 0x29, 0x3f,
|
||||
0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x69,
|
||||
0x6e, 0x74, 0x65, 0x67, 0x65, 0x72, 0x3a, 0x20, 0x27, 0x28, 0x22, 0x2d,
|
||||
0x22, 0x3f, 0x20, 0x28, 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0x20, 0x7c, 0x20,
|
||||
0x5b, 0x31, 0x2d, 0x39, 0x5d, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0x2a,
|
||||
0x29, 0x29, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27, 0x2c, 0x0a, 0x20,
|
||||
0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x3a, 0x20, 0x60, 0x20, 0x22,
|
||||
0x5c, 0x5c, 0x22, 0x22, 0x20, 0x28, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x5b, 0x5e, 0x22, 0x5c, 0x5c, 0x5c, 0x5c, 0x5d, 0x20,
|
||||
0x7c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x22, 0x5c,
|
||||
0x5c, 0x5c, 0x5c, 0x22, 0x20, 0x28, 0x5b, 0x22, 0x5c, 0x5c, 0x5c, 0x5c,
|
||||
0x2f, 0x62, 0x66, 0x6e, 0x72, 0x74, 0x5d, 0x20, 0x7c, 0x20, 0x22, 0x75,
|
||||
0x22, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x61, 0x2d, 0x66, 0x41, 0x2d, 0x46,
|
||||
0x5d, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x61, 0x2d, 0x66, 0x41, 0x2d, 0x46,
|
||||
0x5d, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x61, 0x2d, 0x66, 0x41, 0x2d, 0x46,
|
||||
0x5d, 0x20, 0x5b, 0x30, 0x2d, 0x39, 0x61, 0x2d, 0x66, 0x41, 0x2d, 0x46,
|
||||
0x5d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x29, 0x2a, 0x20,
|
||||
0x22, 0x5c, 0x5c, 0x22, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x60,
|
||||
0x2c, 0x0a, 0x20, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3a, 0x20, 0x27, 0x22,
|
||||
0x6e, 0x75, 0x6c, 0x6c, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27,
|
||||
0x2c, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f, 0x52, 0x55, 0x4c, 0x45,
|
||||
0x5f, 0x43, 0x48, 0x41, 0x52, 0x53, 0x5f, 0x52, 0x45, 0x20, 0x3d, 0x20,
|
||||
0x2f, 0x5b, 0x5e, 0x5c, 0x64, 0x41, 0x2d, 0x5a, 0x61, 0x2d, 0x7a, 0x2d,
|
||||
0x5d, 0x2b, 0x2f, 0x67, 0x3b, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x5f, 0x4c, 0x49, 0x54, 0x45,
|
||||
0x52, 0x41, 0x4c, 0x5f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x5f, 0x52,
|
||||
0x45, 0x20, 0x3d, 0x20, 0x2f, 0x5b, 0x5c, 0x6e, 0x5c, 0x72, 0x22, 0x5d,
|
||||
0x2f, 0x67, 0x3b, 0x0a, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x47, 0x52,
|
||||
0x41, 0x4d, 0x4d, 0x41, 0x52, 0x5f, 0x4c, 0x49, 0x54, 0x45, 0x52, 0x41,
|
||||
0x4c, 0x5f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x53, 0x20, 0x3d, 0x20,
|
||||
0x7b, 0x27, 0x5c, 0x72, 0x27, 0x3a, 0x20, 0x27, 0x5c, 0x5c, 0x72, 0x27,
|
||||
0x2c, 0x20, 0x27, 0x5c, 0x6e, 0x27, 0x3a, 0x20, 0x27, 0x5c, 0x5c, 0x6e,
|
||||
0x27, 0x2c, 0x20, 0x27, 0x22, 0x27, 0x3a, 0x20, 0x27, 0x5c, 0x5c, 0x22,
|
||||
0x27, 0x7d, 0x3b, 0x0a, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20,
|
||||
0x63, 0x6c, 0x61, 0x73, 0x73, 0x20, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61,
|
||||
0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x72, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x6f,
|
||||
0x72, 0x28, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e,
|
||||
0x5f, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x20, 0x3d,
|
||||
0x20, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x20, 0x7c,
|
||||
0x7c, 0x20, 0x7b, 0x7d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68,
|
||||
0x69, 0x73, 0x2e, 0x5f, 0x72, 0x75, 0x6c, 0x65, 0x73, 0x20, 0x3d, 0x20,
|
||||
0x6e, 0x65, 0x77, 0x20, 0x4d, 0x61, 0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x72, 0x75, 0x6c,
|
||||
0x65, 0x73, 0x2e, 0x73, 0x65, 0x74, 0x28, 0x27, 0x73, 0x70, 0x61, 0x63,
|
||||
0x65, 0x27, 0x2c, 0x20, 0x53, 0x50, 0x41, 0x43, 0x45, 0x5f, 0x52, 0x55,
|
||||
0x4c, 0x45, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x4c, 0x69, 0x74, 0x65, 0x72,
|
||||
0x61, 0x6c, 0x28, 0x6c, 0x69, 0x74, 0x65, 0x72, 0x61, 0x6c, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x64, 0x20, 0x3d, 0x20, 0x4a, 0x53,
|
||||
0x4f, 0x4e, 0x2e, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x69, 0x66, 0x79,
|
||||
0x28, 0x6c, 0x69, 0x74, 0x65, 0x72, 0x61, 0x6c, 0x29, 0x2e, 0x72, 0x65,
|
||||
0x70, 0x6c, 0x61, 0x63, 0x65, 0x28, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x5f, 0x4c, 0x49, 0x54,
|
||||
0x45, 0x52, 0x41, 0x4c, 0x5f, 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x5f,
|
||||
0x52, 0x45, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x20,
|
||||
0x3d, 0x3e, 0x20, 0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x5f, 0x4c,
|
||||
0x49, 0x54, 0x45, 0x52, 0x41, 0x4c, 0x5f, 0x45, 0x53, 0x43, 0x41, 0x50,
|
||||
0x45, 0x53, 0x5b, 0x6d, 0x5d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x29, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x60, 0x22, 0x24, 0x7b, 0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x64, 0x7d,
|
||||
0x22, 0x60, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x5f,
|
||||
0x61, 0x64, 0x64, 0x52, 0x75, 0x6c, 0x65, 0x28, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0x2c, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x65, 0x73, 0x63, 0x4e, 0x61, 0x6d,
|
||||
0x65, 0x20, 0x3d, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x2e, 0x72, 0x65, 0x70,
|
||||
0x6c, 0x61, 0x63, 0x65, 0x28, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44,
|
||||
0x5f, 0x52, 0x55, 0x4c, 0x45, 0x5f, 0x43, 0x48, 0x41, 0x52, 0x53, 0x5f,
|
||||
0x52, 0x45, 0x2c, 0x20, 0x27, 0x2d, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6b, 0x65, 0x79, 0x20, 0x3d, 0x20,
|
||||
0x65, 0x73, 0x63, 0x4e, 0x61, 0x6d, 0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f,
|
||||
0x72, 0x75, 0x6c, 0x65, 0x73, 0x2e, 0x68, 0x61, 0x73, 0x28, 0x65, 0x73,
|
||||
0x63, 0x4e, 0x61, 0x6d, 0x65, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x74, 0x68, 0x69, 0x73,
|
||||
0x2e, 0x5f, 0x72, 0x75, 0x6c, 0x65, 0x73, 0x2e, 0x67, 0x65, 0x74, 0x28,
|
||||
0x65, 0x73, 0x63, 0x4e, 0x61, 0x6d, 0x65, 0x29, 0x20, 0x3d, 0x3d, 0x3d,
|
||||
0x20, 0x72, 0x75, 0x6c, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x6b, 0x65, 0x79, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20,
|
||||
0x69, 0x20, 0x3d, 0x20, 0x30, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x74, 0x68, 0x69, 0x73,
|
||||
0x2e, 0x5f, 0x72, 0x75, 0x6c, 0x65, 0x73, 0x2e, 0x68, 0x61, 0x73, 0x28,
|
||||
0x60, 0x24, 0x7b, 0x65, 0x73, 0x63, 0x4e, 0x61, 0x6d, 0x65, 0x7d, 0x24,
|
||||
0x7b, 0x69, 0x7d, 0x60, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x20, 0x2b, 0x3d, 0x20, 0x31, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x6b, 0x65, 0x79, 0x20, 0x3d, 0x20, 0x60, 0x24, 0x7b,
|
||||
0x65, 0x73, 0x63, 0x4e, 0x61, 0x6d, 0x65, 0x7d, 0x24, 0x7b, 0x69, 0x7d,
|
||||
0x60, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x72, 0x75, 0x6c, 0x65,
|
||||
0x73, 0x2e, 0x73, 0x65, 0x74, 0x28, 0x6b, 0x65, 0x79, 0x2c, 0x20, 0x72,
|
||||
0x75, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
|
||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x6b, 0x65, 0x79, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x76, 0x69, 0x73, 0x69, 0x74, 0x28, 0x73,
|
||||
0x63, 0x68, 0x65, 0x6d, 0x61, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65, 0x20,
|
||||
0x3d, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x74, 0x79, 0x70,
|
||||
0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x20, 0x3d, 0x20,
|
||||
0x6e, 0x61, 0x6d, 0x65, 0x20, 0x7c, 0x7c, 0x20, 0x27, 0x72, 0x6f, 0x6f,
|
||||
0x74, 0x27, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||
0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x6f, 0x6e, 0x65, 0x4f,
|
||||
0x66, 0x20, 0x7c, 0x7c, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e,
|
||||
0x61, 0x6e, 0x79, 0x4f, 0x66, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x75, 0x6c,
|
||||
0x65, 0x20, 0x3d, 0x20, 0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e,
|
||||
0x6f, 0x6e, 0x65, 0x4f, 0x66, 0x20, 0x7c, 0x7c, 0x20, 0x73, 0x63, 0x68,
|
||||
0x65, 0x6d, 0x61, 0x2e, 0x61, 0x6e, 0x79, 0x4f, 0x66, 0x29, 0x2e, 0x6d,
|
||||
0x61, 0x70, 0x28, 0x28, 0x61, 0x6c, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d,
|
||||
0x61, 0x2c, 0x20, 0x69, 0x29, 0x20, 0x3d, 0x3e, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x76, 0x69,
|
||||
0x73, 0x69, 0x74, 0x28, 0x61, 0x6c, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d,
|
||||
0x61, 0x2c, 0x20, 0x60, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x24,
|
||||
0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x3f, 0x20, 0x22, 0x2d, 0x22, 0x20,
|
||||
0x3a, 0x20, 0x22, 0x22, 0x7d, 0x24, 0x7b, 0x69, 0x7d, 0x60, 0x29, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x29, 0x2e, 0x6a, 0x6f, 0x69, 0x6e,
|
||||
0x28, 0x27, 0x20, 0x7c, 0x20, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74,
|
||||
0x68, 0x69, 0x73, 0x2e, 0x5f, 0x61, 0x64, 0x64, 0x52, 0x75, 0x6c, 0x65,
|
||||
0x28, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x72,
|
||||
0x75, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20,
|
||||
0x65, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, 0x20, 0x28, 0x27, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x27, 0x20, 0x69, 0x6e, 0x20, 0x73, 0x63, 0x68, 0x65,
|
||||
0x6d, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e,
|
||||
0x5f, 0x61, 0x64, 0x64, 0x52, 0x75, 0x6c, 0x65, 0x28, 0x72, 0x75, 0x6c,
|
||||
0x65, 0x4e, 0x61, 0x6d, 0x65, 0x2c, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e,
|
||||
0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x4c, 0x69, 0x74, 0x65, 0x72,
|
||||
0x61, 0x6c, 0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, 0x20, 0x28, 0x27, 0x65,
|
||||
0x6e, 0x75, 0x6d, 0x27, 0x20, 0x69, 0x6e, 0x20, 0x73, 0x63, 0x68, 0x65,
|
||||
0x6d, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x20, 0x3d,
|
||||
0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x65, 0x6e, 0x75, 0x6d,
|
||||
0x2e, 0x6d, 0x61, 0x70, 0x28, 0x76, 0x20, 0x3d, 0x3e, 0x20, 0x74, 0x68,
|
||||
0x69, 0x73, 0x2e, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x4c, 0x69,
|
||||
0x74, 0x65, 0x72, 0x61, 0x6c, 0x28, 0x76, 0x29, 0x29, 0x2e, 0x6a, 0x6f,
|
||||
0x69, 0x6e, 0x28, 0x27, 0x20, 0x7c, 0x20, 0x27, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x61, 0x64, 0x64, 0x52, 0x75, 0x6c,
|
||||
0x65, 0x28, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x2c, 0x20,
|
||||
0x72, 0x75, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66, 0x20, 0x28, 0x73, 0x63,
|
||||
0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65, 0x20, 0x3d, 0x3d, 0x3d,
|
||||
0x20, 0x27, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x27, 0x20, 0x26, 0x26,
|
||||
0x20, 0x27, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73,
|
||||
0x27, 0x20, 0x69, 0x6e, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
||||
0x54, 0x4f, 0x44, 0x4f, 0x3a, 0x20, 0x60, 0x72, 0x65, 0x71, 0x75, 0x69,
|
||||
0x72, 0x65, 0x64, 0x60, 0x20, 0x6b, 0x65, 0x79, 0x77, 0x6f, 0x72, 0x64,
|
||||
0x20, 0x28, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x70, 0x79, 0x74, 0x68, 0x6f,
|
||||
0x6e, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72,
|
||||
0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f,
|
||||
0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70,
|
||||
0x72, 0x6f, 0x70, 0x50, 0x61, 0x69, 0x72, 0x73, 0x20, 0x3d, 0x20, 0x4f,
|
||||
0x62, 0x6a, 0x65, 0x63, 0x74, 0x2e, 0x65, 0x6e, 0x74, 0x72, 0x69, 0x65,
|
||||
0x73, 0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f,
|
||||
0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x29, 0x2e, 0x73, 0x6f, 0x72,
|
||||
0x74, 0x28, 0x28, 0x61, 0x2c, 0x20, 0x62, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
||||
0x20, 0x73, 0x6f, 0x72, 0x74, 0x20, 0x62, 0x79, 0x20, 0x70, 0x6f, 0x73,
|
||||
0x69, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f,
|
||||
0x70, 0x5f, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x20, 0x28, 0x69, 0x66, 0x20,
|
||||
0x73, 0x70, 0x65, 0x63, 0x69, 0x66, 0x69, 0x65, 0x64, 0x29, 0x20, 0x74,
|
||||
0x68, 0x65, 0x6e, 0x20, 0x62, 0x79, 0x20, 0x6b, 0x65, 0x79, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x41, 0x20, 0x3d, 0x20, 0x74, 0x79,
|
||||
0x70, 0x65, 0x6f, 0x66, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64,
|
||||
0x65, 0x72, 0x5b, 0x61, 0x5b, 0x30, 0x5d, 0x5d, 0x20, 0x3d, 0x3d, 0x3d,
|
||||
0x20, 0x27, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x27, 0x20, 0x3f, 0x20,
|
||||
0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x5b, 0x61, 0x5b,
|
||||
0x30, 0x5d, 0x5d, 0x20, 0x3a, 0x20, 0x49, 0x6e, 0x66, 0x69, 0x6e, 0x69,
|
||||
0x74, 0x79, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x42,
|
||||
0x20, 0x3d, 0x20, 0x74, 0x79, 0x70, 0x65, 0x6f, 0x66, 0x20, 0x70, 0x72,
|
||||
0x6f, 0x70, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x5b, 0x62, 0x5b, 0x30, 0x5d,
|
||||
0x5d, 0x20, 0x3d, 0x3d, 0x3d, 0x20, 0x27, 0x6e, 0x75, 0x6d, 0x62, 0x65,
|
||||
0x72, 0x27, 0x20, 0x3f, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x4f, 0x72, 0x64,
|
||||
0x65, 0x72, 0x5b, 0x62, 0x5b, 0x30, 0x5d, 0x5d, 0x20, 0x3a, 0x20, 0x49,
|
||||
0x6e, 0x66, 0x69, 0x6e, 0x69, 0x74, 0x79, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x6f, 0x72, 0x64, 0x65, 0x72, 0x41, 0x20, 0x2d, 0x20, 0x6f, 0x72, 0x64,
|
||||
0x65, 0x72, 0x42, 0x20, 0x7c, 0x7c, 0x20, 0x61, 0x5b, 0x30, 0x5d, 0x2e,
|
||||
0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x65, 0x43, 0x6f, 0x6d, 0x70, 0x61, 0x72,
|
||||
0x65, 0x28, 0x62, 0x5b, 0x30, 0x5d, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x20, 0x3d,
|
||||
0x20, 0x27, 0x22, 0x7b, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, 0x72, 0x6f, 0x70,
|
||||
0x50, 0x61, 0x69, 0x72, 0x73, 0x2e, 0x66, 0x6f, 0x72, 0x45, 0x61, 0x63,
|
||||
0x68, 0x28, 0x28, 0x5b, 0x70, 0x72, 0x6f, 0x70, 0x4e, 0x61, 0x6d, 0x65,
|
||||
0x2c, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61,
|
||||
0x5d, 0x2c, 0x20, 0x69, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||
0x20, 0x70, 0x72, 0x6f, 0x70, 0x52, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d,
|
||||
0x65, 0x20, 0x3d, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x76, 0x69, 0x73,
|
||||
0x69, 0x74, 0x28, 0x70, 0x72, 0x6f, 0x70, 0x53, 0x63, 0x68, 0x65, 0x6d,
|
||||
0x61, 0x2c, 0x20, 0x60, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x24,
|
||||
0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x3f, 0x20, 0x22, 0x2d, 0x22, 0x20,
|
||||
0x3a, 0x20, 0x22, 0x22, 0x7d, 0x24, 0x7b, 0x70, 0x72, 0x6f, 0x70, 0x4e,
|
||||
0x61, 0x6d, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x69, 0x20, 0x3e, 0x20,
|
||||
0x30, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x20, 0x2b, 0x3d, 0x20, 0x27,
|
||||
0x20, 0x22, 0x2c, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x20,
|
||||
0x2b, 0x3d, 0x20, 0x60, 0x20, 0x24, 0x7b, 0x74, 0x68, 0x69, 0x73, 0x2e,
|
||||
0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x4c, 0x69, 0x74, 0x65, 0x72,
|
||||
0x61, 0x6c, 0x28, 0x70, 0x72, 0x6f, 0x70, 0x4e, 0x61, 0x6d, 0x65, 0x29,
|
||||
0x7d, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x20, 0x22, 0x3a, 0x22, 0x20,
|
||||
0x73, 0x70, 0x61, 0x63, 0x65, 0x20, 0x24, 0x7b, 0x70, 0x72, 0x6f, 0x70,
|
||||
0x52, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x7d, 0x60, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x20, 0x2b, 0x3d, 0x20,
|
||||
0x27, 0x20, 0x22, 0x7d, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x27,
|
||||
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74,
|
||||
0x75, 0x72, 0x6e, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x61, 0x64,
|
||||
0x64, 0x52, 0x75, 0x6c, 0x65, 0x28, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61,
|
||||
0x6d, 0x65, 0x2c, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x69, 0x66,
|
||||
0x20, 0x28, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65,
|
||||
0x20, 0x3d, 0x3d, 0x3d, 0x20, 0x27, 0x61, 0x72, 0x72, 0x61, 0x79, 0x27,
|
||||
0x20, 0x26, 0x26, 0x20, 0x27, 0x69, 0x74, 0x65, 0x6d, 0x73, 0x27, 0x20,
|
||||
0x69, 0x6e, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x29, 0x20, 0x7b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x4f,
|
||||
0x44, 0x4f, 0x20, 0x60, 0x70, 0x72, 0x65, 0x66, 0x69, 0x78, 0x49, 0x74,
|
||||
0x65, 0x6d, 0x73, 0x60, 0x20, 0x6b, 0x65, 0x79, 0x77, 0x6f, 0x72, 0x64,
|
||||
0x20, 0x28, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x70, 0x79, 0x74, 0x68, 0x6f,
|
||||
0x6e, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x69, 0x74, 0x65, 0x6d, 0x52, 0x75,
|
||||
0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x20, 0x3d, 0x20, 0x74, 0x68, 0x69,
|
||||
0x73, 0x2e, 0x76, 0x69, 0x73, 0x69, 0x74, 0x28, 0x73, 0x63, 0x68, 0x65,
|
||||
0x6d, 0x61, 0x2e, 0x69, 0x74, 0x65, 0x6d, 0x73, 0x2c, 0x20, 0x60, 0x24,
|
||||
0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0x20, 0x3f, 0x20, 0x22, 0x2d, 0x22, 0x20, 0x3a, 0x20, 0x22, 0x22, 0x7d,
|
||||
0x69, 0x74, 0x65, 0x6d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x75, 0x6c, 0x65,
|
||||
0x20, 0x3d, 0x20, 0x60, 0x22, 0x5b, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63,
|
||||
0x65, 0x20, 0x28, 0x24, 0x7b, 0x69, 0x74, 0x65, 0x6d, 0x52, 0x75, 0x6c,
|
||||
0x65, 0x4e, 0x61, 0x6d, 0x65, 0x7d, 0x20, 0x28, 0x22, 0x2c, 0x22, 0x20,
|
||||
0x73, 0x70, 0x61, 0x63, 0x65, 0x20, 0x24, 0x7b, 0x69, 0x74, 0x65, 0x6d,
|
||||
0x52, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x7d, 0x29, 0x2a, 0x29,
|
||||
0x3f, 0x20, 0x22, 0x5d, 0x22, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x60,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||
0x72, 0x6e, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x61, 0x64, 0x64,
|
||||
0x52, 0x75, 0x6c, 0x65, 0x28, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d,
|
||||
0x65, 0x2c, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x50, 0x52,
|
||||
0x49, 0x4d, 0x49, 0x54, 0x49, 0x56, 0x45, 0x5f, 0x52, 0x55, 0x4c, 0x45,
|
||||
0x53, 0x5b, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65,
|
||||
0x5d, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x55, 0x6e, 0x72, 0x65, 0x63, 0x6f,
|
||||
0x67, 0x6e, 0x69, 0x7a, 0x65, 0x64, 0x20, 0x73, 0x63, 0x68, 0x65, 0x6d,
|
||||
0x61, 0x3a, 0x20, 0x24, 0x7b, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x73, 0x74,
|
||||
0x72, 0x69, 0x6e, 0x67, 0x69, 0x66, 0x79, 0x28, 0x73, 0x63, 0x68, 0x65,
|
||||
0x6d, 0x61, 0x29, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
|
||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x61,
|
||||
0x64, 0x64, 0x52, 0x75, 0x6c, 0x65, 0x28, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x75, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65,
|
||||
0x20, 0x3d, 0x3d, 0x3d, 0x20, 0x27, 0x72, 0x6f, 0x6f, 0x74, 0x27, 0x20,
|
||||
0x3f, 0x20, 0x27, 0x72, 0x6f, 0x6f, 0x74, 0x27, 0x20, 0x3a, 0x20, 0x73,
|
||||
0x63, 0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65, 0x2c, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x50, 0x52, 0x49, 0x4d, 0x49,
|
||||
0x54, 0x49, 0x56, 0x45, 0x5f, 0x52, 0x55, 0x4c, 0x45, 0x53, 0x5b, 0x73,
|
||||
0x63, 0x68, 0x65, 0x6d, 0x61, 0x54, 0x79, 0x70, 0x65, 0x5d, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x66, 0x6f, 0x72,
|
||||
0x6d, 0x61, 0x74, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x28, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x67,
|
||||
0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x20, 0x3d, 0x20, 0x27, 0x27, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x69, 0x73, 0x2e, 0x5f, 0x72,
|
||||
0x75, 0x6c, 0x65, 0x73, 0x2e, 0x66, 0x6f, 0x72, 0x45, 0x61, 0x63, 0x68,
|
||||
0x28, 0x28, 0x72, 0x75, 0x6c, 0x65, 0x2c, 0x20, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x67, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x20, 0x2b, 0x3d, 0x20,
|
||||
0x60, 0x24, 0x7b, 0x6e, 0x61, 0x6d, 0x65, 0x7d, 0x20, 0x3a, 0x3a, 0x3d,
|
||||
0x20, 0x24, 0x7b, 0x72, 0x75, 0x6c, 0x65, 0x7d, 0x5c, 0x6e, 0x60, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, 0x72, 0x61, 0x6d,
|
||||
0x6d, 0x61, 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a
|
||||
};
|
||||
unsigned int json_schema_to_grammar_mjs_len = 3695;
|
||||
@@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
let content = "";
|
||||
let leftover = ""; // Buffer for partially read lines
|
||||
|
||||
try {
|
||||
let cont = true;
|
||||
@@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
break;
|
||||
}
|
||||
|
||||
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
||||
// mainly care about the data: key here, which we expect as json
|
||||
const text = decoder.decode(result.value);
|
||||
// Add any leftover data to the current chunk of data
|
||||
const text = leftover + decoder.decode(result.value);
|
||||
|
||||
// parse all sse events and add them to result
|
||||
const regex = /^(\S+):\s(.*)$/gm;
|
||||
for (const match of text.matchAll(regex)) {
|
||||
result[match[1]] = match[2]
|
||||
// Check if the last character is a line break
|
||||
const endsWithLineBreak = text.endsWith('\n');
|
||||
|
||||
// Split the text into lines
|
||||
let lines = text.split('\n');
|
||||
|
||||
// If the text doesn't end with a line break, then the last line is incomplete
|
||||
// Store it in leftover to be added to the next chunk of data
|
||||
if (!endsWithLineBreak) {
|
||||
leftover = lines.pop();
|
||||
} else {
|
||||
leftover = ""; // Reset leftover if we have a line break at the end
|
||||
}
|
||||
|
||||
// since we know this is llama.cpp, let's just decode the json in data
|
||||
result.data = JSON.parse(result.data);
|
||||
content += result.data.content;
|
||||
// Parse all sse events and add them to result
|
||||
const regex = /^(\S+):\s(.*)$/gm;
|
||||
for (const line of lines) {
|
||||
const match = regex.exec(line);
|
||||
if (match) {
|
||||
result[match[1]] = match[2]
|
||||
// since we know this is llama.cpp, let's just decode the json in data
|
||||
if (result.data) {
|
||||
result.data = JSON.parse(result.data);
|
||||
content += result.data.content;
|
||||
|
||||
// yield
|
||||
yield result;
|
||||
// yield
|
||||
yield result;
|
||||
|
||||
// if we got a stop token from server, we will break here
|
||||
if (result.data.stop) {
|
||||
if (result.data.generation_settings) {
|
||||
generation_settings = result.data.generation_settings;
|
||||
// if we got a stop token from server, we will break here
|
||||
if (result.data.stop) {
|
||||
if (result.data.generation_settings) {
|
||||
generation_settings = result.data.generation_settings;
|
||||
}
|
||||
cont = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
|
||||
@@ -141,14 +141,15 @@
|
||||
} from '/index.js';
|
||||
|
||||
import { llama } from '/completion.js';
|
||||
import { SchemaConverter } from '/json-schema-to-grammar.mjs';
|
||||
|
||||
const session = signal({
|
||||
prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
|
||||
prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
|
||||
template: "{{prompt}}\n\n{{history}}\n{{char}}:",
|
||||
historyTemplate: "{{name}}: {{message}}",
|
||||
transcript: [],
|
||||
type: "chat",
|
||||
char: "llama",
|
||||
char: "Llama",
|
||||
user: "User",
|
||||
})
|
||||
|
||||
@@ -166,8 +167,139 @@
|
||||
mirostat: 0, // 0/1/2
|
||||
mirostat_tau: 5, // target entropy
|
||||
mirostat_eta: 0.1, // learning rate
|
||||
grammar: '',
|
||||
})
|
||||
|
||||
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
||||
|
||||
const local_storage_storageKey = "llamacpp_server_local_storage";
|
||||
|
||||
function local_storage_setDataFromObject(tag, content) {
|
||||
localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
|
||||
}
|
||||
|
||||
function local_storage_setDataFromRawText(tag, content) {
|
||||
localStorage.setItem(local_storage_storageKey + '/' + tag, content);
|
||||
}
|
||||
|
||||
function local_storage_getDataAsObject(tag) {
|
||||
const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
|
||||
if (!item) {
|
||||
return null;
|
||||
} else {
|
||||
return JSON.parse(item);
|
||||
}
|
||||
}
|
||||
|
||||
function local_storage_getDataAsRawText(tag) {
|
||||
const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
|
||||
if (!item) {
|
||||
return null;
|
||||
} else {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
// create a container for user templates and settings
|
||||
|
||||
const savedUserTemplates = signal({})
|
||||
const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
|
||||
|
||||
// let's import locally saved templates and settings if there are any
|
||||
// user templates and settings are stored in one object
|
||||
// in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
|
||||
|
||||
console.log('Importing saved templates')
|
||||
|
||||
let importedTemplates = local_storage_getDataAsObject('user_templates')
|
||||
|
||||
if (importedTemplates) {
|
||||
// saved templates were successfuly imported.
|
||||
|
||||
console.log('Processing saved templates and updating default template')
|
||||
|
||||
//console.log(importedTemplates);
|
||||
savedUserTemplates.value = importedTemplates;
|
||||
|
||||
//override default template
|
||||
savedUserTemplates.value.default = { session: session.value, params: params.value }
|
||||
local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
|
||||
} else {
|
||||
// no saved templates detected.
|
||||
|
||||
console.log('Initializing LocalStorage and saving default template')
|
||||
|
||||
savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
|
||||
local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
|
||||
}
|
||||
|
||||
function userTemplateResetToDefault() {
|
||||
console.log('Reseting themplate to default')
|
||||
selectedUserTemplate.value.name = 'default';
|
||||
selectedUserTemplate.value.data = savedUserTemplates.value['default'];
|
||||
}
|
||||
|
||||
function userTemplateApply(t) {
|
||||
session.value = t.data.session;
|
||||
params.value = t.data.params;
|
||||
}
|
||||
|
||||
function userTemplateResetToDefaultAndApply() {
|
||||
userTemplateResetToDefault()
|
||||
userTemplateApply(selectedUserTemplate.value)
|
||||
}
|
||||
|
||||
function userTemplateLoadAndApplyAutosaved() {
|
||||
// get autosaved last used template
|
||||
let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
|
||||
|
||||
if (lastUsedTemplate) {
|
||||
|
||||
console.log('Autosaved template found, restoring')
|
||||
|
||||
selectedUserTemplate.value = lastUsedTemplate
|
||||
}
|
||||
else {
|
||||
|
||||
console.log('No autosaved template found, using default template')
|
||||
// no autosaved last used template was found, so load from default.
|
||||
|
||||
userTemplateResetToDefault()
|
||||
}
|
||||
|
||||
console.log('Applying template')
|
||||
// and update internal data from templates
|
||||
|
||||
userTemplateApply(selectedUserTemplate.value)
|
||||
}
|
||||
|
||||
//console.log(savedUserTemplates.value)
|
||||
//console.log(selectedUserTemplate.value)
|
||||
|
||||
function userTemplateAutosave() {
|
||||
console.log('Template Autosave...')
|
||||
if (selectedUserTemplate.value.name == 'default') {
|
||||
// we don't want to save over default template, so let's create a new one
|
||||
let newTemplateName = 'UserTemplate-' + Date.now().toString()
|
||||
let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
|
||||
|
||||
console.log('Saving as ' + newTemplateName)
|
||||
|
||||
// save in the autosave slot
|
||||
local_storage_setDataFromObject('user_templates_last', newTemplate)
|
||||
|
||||
// and load it back and apply
|
||||
userTemplateLoadAndApplyAutosaved()
|
||||
} else {
|
||||
local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Checking for autosaved last used template')
|
||||
userTemplateLoadAndApplyAutosaved()
|
||||
|
||||
/* END: Support for storing prompt templates and parameters in browsers LocalStorage */
|
||||
|
||||
const llamaStats = signal(null)
|
||||
const controller = signal(null)
|
||||
|
||||
@@ -282,8 +414,9 @@
|
||||
|
||||
useEffect(() => {
|
||||
// scroll to bottom (if needed)
|
||||
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 300) {
|
||||
container.current.scrollTo(0, container.current.scrollHeight)
|
||||
const parent = container.current.parentElement;
|
||||
if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
|
||||
parent.scrollTo(0, parent.scrollHeight)
|
||||
}
|
||||
}, [messages])
|
||||
|
||||
@@ -303,6 +436,26 @@
|
||||
const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
|
||||
const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
|
||||
|
||||
const grammarJsonSchemaPropOrder = signal('')
|
||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||
const convertJSONSchemaGrammar = () => {
|
||||
try {
|
||||
const schema = JSON.parse(params.value.grammar)
|
||||
const converter = new SchemaConverter(
|
||||
grammarJsonSchemaPropOrder.value
|
||||
.split(',')
|
||||
.reduce((acc, cur, i) => ({...acc, [cur.trim()]: i}), {})
|
||||
)
|
||||
converter.visit(schema, '')
|
||||
params.value = {
|
||||
...params.value,
|
||||
grammar: converter.formatGrammar(),
|
||||
}
|
||||
} catch (e) {
|
||||
alert(`Convert failed: ${e.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
const FloatField = ({label, max, min, name, step, value}) => {
|
||||
return html`
|
||||
<div>
|
||||
@@ -323,8 +476,34 @@
|
||||
`
|
||||
};
|
||||
|
||||
const userTemplateReset = (e) => {
|
||||
e.preventDefault();
|
||||
userTemplateResetToDefaultAndApply()
|
||||
}
|
||||
|
||||
const UserTemplateResetButton = () => {
|
||||
if (selectedUserTemplate.value.name == 'default') {
|
||||
return html`
|
||||
<button disabled>Using default template</button>
|
||||
`
|
||||
}
|
||||
|
||||
return html`
|
||||
<button onclick=${userTemplateReset}>Reset all to default</button>
|
||||
`
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
// autosave template on every change
|
||||
userTemplateAutosave()
|
||||
}, [session.value, params.value])
|
||||
|
||||
return html`
|
||||
<form>
|
||||
<fieldset>
|
||||
<${UserTemplateResetButton}/>
|
||||
</fieldset>
|
||||
|
||||
<fieldset>
|
||||
<div>
|
||||
<label for="prompt">Prompt</label>
|
||||
@@ -354,6 +533,13 @@
|
||||
<label for="template">Chat history template</label>
|
||||
<textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label for="template">Grammar</label>
|
||||
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
<fieldset class="two">
|
||||
|
||||
File diff suppressed because one or more lines are too long
112
examples/server/public/json-schema-to-grammar.mjs
Normal file
112
examples/server/public/json-schema-to-grammar.mjs
Normal file
@@ -0,0 +1,112 @@
|
||||
const SPACE_RULE = '" "?';
|
||||
|
||||
const PRIMITIVE_RULES = {
|
||||
boolean: '("true" | "false") space',
|
||||
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
string: ` "\\"" (
|
||||
[^"\\\\] |
|
||||
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\\"" space`,
|
||||
null: '"null" space',
|
||||
};
|
||||
|
||||
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
||||
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
||||
const GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'};
|
||||
|
||||
export class SchemaConverter {
|
||||
constructor(propOrder) {
|
||||
this._propOrder = propOrder || {};
|
||||
this._rules = new Map();
|
||||
this._rules.set('space', SPACE_RULE);
|
||||
}
|
||||
|
||||
_formatLiteral(literal) {
|
||||
const escaped = JSON.stringify(literal).replace(
|
||||
GRAMMAR_LITERAL_ESCAPE_RE,
|
||||
m => GRAMMAR_LITERAL_ESCAPES[m]
|
||||
);
|
||||
return `"${escaped}"`;
|
||||
}
|
||||
|
||||
_addRule(name, rule) {
|
||||
let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
|
||||
let key = escName;
|
||||
|
||||
if (this._rules.has(escName)) {
|
||||
if (this._rules.get(escName) === rule) {
|
||||
return key;
|
||||
}
|
||||
|
||||
let i = 0;
|
||||
while (this._rules.has(`${escName}${i}`)) {
|
||||
i += 1;
|
||||
}
|
||||
key = `${escName}${i}`;
|
||||
}
|
||||
|
||||
this._rules.set(key, rule);
|
||||
return key;
|
||||
}
|
||||
|
||||
visit(schema, name) {
|
||||
const schemaType = schema.type;
|
||||
const ruleName = name || 'root';
|
||||
|
||||
if (schema.oneOf || schema.anyOf) {
|
||||
const rule = (schema.oneOf || schema.anyOf).map((altSchema, i) =>
|
||||
this.visit(altSchema, `${name}${name ? "-" : ""}${i}`)
|
||||
).join(' | ');
|
||||
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if ('const' in schema) {
|
||||
return this._addRule(ruleName, this._formatLiteral(schema.const));
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if (schemaType === 'object' && 'properties' in schema) {
|
||||
// TODO: `required` keyword (from python implementation)
|
||||
const propOrder = this._propOrder;
|
||||
const propPairs = Object.entries(schema.properties).sort((a, b) => {
|
||||
// sort by position in prop_order (if specified) then by key
|
||||
const orderA = typeof propOrder[a[0]] === 'number' ? propOrder[a[0]] : Infinity;
|
||||
const orderB = typeof propOrder[b[0]] === 'number' ? propOrder[b[0]] : Infinity;
|
||||
return orderA - orderB || a[0].localeCompare(b[0]);
|
||||
});
|
||||
|
||||
let rule = '"{" space';
|
||||
propPairs.forEach(([propName, propSchema], i) => {
|
||||
const propRuleName = this.visit(propSchema, `${name}${name ? "-" : ""}${propName}`);
|
||||
if (i > 0) {
|
||||
rule += ' "," space';
|
||||
}
|
||||
rule += ` ${this._formatLiteral(propName)} space ":" space ${propRuleName}`;
|
||||
});
|
||||
rule += ' "}" space';
|
||||
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if (schemaType === 'array' && 'items' in schema) {
|
||||
// TODO `prefixItems` keyword (from python implementation)
|
||||
const itemRuleName = this.visit(schema.items, `${name}${name ? "-" : ""}item`);
|
||||
const rule = `"[" space (${itemRuleName} ("," space ${itemRuleName})*)? "]" space`;
|
||||
return this._addRule(ruleName, rule);
|
||||
} else {
|
||||
if (!PRIMITIVE_RULES[schemaType]) {
|
||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||
}
|
||||
return this._addRule(
|
||||
ruleName === 'root' ? 'root' : schemaType,
|
||||
PRIMITIVE_RULES[schemaType]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
formatGrammar() {
|
||||
let grammar = '';
|
||||
this._rules.forEach((rule, name) => {
|
||||
grammar += `${name} ::= ${rule}\n`;
|
||||
});
|
||||
return grammar;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "build-info.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
// crash the server in debug mode, otherwise send an http 500 error
|
||||
@@ -14,6 +15,7 @@
|
||||
#include "index.html.hpp"
|
||||
#include "index.js.hpp"
|
||||
#include "completion.js.hpp"
|
||||
#include "json-schema-to-grammar.mjs.hpp"
|
||||
|
||||
#ifndef SERVER_VERBOSE
|
||||
#define SERVER_VERBOSE 1
|
||||
@@ -195,6 +197,9 @@ struct llama_server_context
|
||||
llama_context *ctx = nullptr;
|
||||
gpt_params params;
|
||||
|
||||
grammar_parser::parse_state parsed_grammar;
|
||||
llama_grammar *grammar = nullptr;
|
||||
|
||||
bool truncated = false;
|
||||
bool stopped_eos = false;
|
||||
bool stopped_word = false;
|
||||
@@ -226,6 +231,7 @@ struct llama_server_context
|
||||
void rewind()
|
||||
{
|
||||
params.antiprompt.clear();
|
||||
params.grammar.clear();
|
||||
num_prompt_tokens = 0;
|
||||
num_tokens_predicted = 0;
|
||||
generated_text = "";
|
||||
@@ -237,9 +243,13 @@ struct llama_server_context
|
||||
stopped_limit = false;
|
||||
stopping_word = "";
|
||||
multibyte_pending = 0;
|
||||
|
||||
n_remain = 0;
|
||||
n_past = 0;
|
||||
|
||||
if (grammar != nullptr) {
|
||||
llama_grammar_free(grammar);
|
||||
grammar = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool loadModel(const gpt_params ¶ms_)
|
||||
@@ -257,6 +267,31 @@ struct llama_server_context
|
||||
return true;
|
||||
}
|
||||
|
||||
bool loadGrammar()
|
||||
{
|
||||
if (!params.grammar.empty()) {
|
||||
parsed_grammar = grammar_parser::parse(params.grammar.c_str());
|
||||
// will be empty (default) if there are parse errors
|
||||
if (parsed_grammar.rules.empty()) {
|
||||
LOG_ERROR("grammar parse error", {{"grammar", params.grammar}});
|
||||
return false;
|
||||
}
|
||||
grammar_parser::print_grammar(stderr, parsed_grammar);
|
||||
|
||||
{
|
||||
auto it = params.logit_bias.find(llama_token_eos(ctx));
|
||||
if (it != params.logit_bias.end() && it->second == -INFINITY) {
|
||||
LOG_WARNING("EOS token is disabled, which will cause most grammars to fail", {});
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
|
||||
grammar = llama_grammar_init(
|
||||
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void loadPrompt()
|
||||
{
|
||||
params.prompt.insert(0, 1, ' '); // always add a first space
|
||||
@@ -367,7 +402,7 @@ struct llama_server_context
|
||||
if (params.n_predict == 0)
|
||||
{
|
||||
has_next_token = false;
|
||||
result.tok = llama_token_eos();
|
||||
result.tok = llama_token_eos(ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -407,7 +442,7 @@ struct llama_server_context
|
||||
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
||||
|
||||
// Apply penalties
|
||||
float nl_logit = logits[llama_token_nl()];
|
||||
float nl_logit = logits[llama_token_nl(ctx)];
|
||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), params.n_ctx);
|
||||
llama_sample_repetition_penalty(ctx, &candidates_p,
|
||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
||||
@@ -417,7 +452,11 @@ struct llama_server_context
|
||||
last_n_repeat, alpha_frequency, alpha_presence);
|
||||
if (!penalize_nl)
|
||||
{
|
||||
logits[llama_token_nl()] = nl_logit;
|
||||
logits[llama_token_nl(ctx)] = nl_logit;
|
||||
}
|
||||
|
||||
if (grammar != nullptr) {
|
||||
llama_sample_grammar(ctx, &candidates_p, grammar);
|
||||
}
|
||||
|
||||
if (temp <= 0)
|
||||
@@ -457,10 +496,15 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
if (grammar != nullptr) {
|
||||
llama_grammar_accept_token(ctx, grammar, result.tok);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)
|
||||
{
|
||||
result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
|
||||
}
|
||||
|
||||
last_n_tokens.erase(last_n_tokens.begin());
|
||||
last_n_tokens.push_back(result.tok);
|
||||
num_tokens_predicted++;
|
||||
@@ -471,7 +515,7 @@ struct llama_server_context
|
||||
// decrement remaining sampling budget
|
||||
--n_remain;
|
||||
|
||||
if (!embd.empty() && embd.back() == llama_token_eos())
|
||||
if (!embd.empty() && embd.back() == llama_token_eos(ctx))
|
||||
{
|
||||
// stopping_word = llama_token_to_str(ctx, embd.back());
|
||||
has_next_token = false;
|
||||
@@ -608,8 +652,6 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||
fprintf(stdout, " -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
|
||||
fprintf(stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||
fprintf(stdout, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
|
||||
fprintf(stdout, " -gqa N, --gqa N grouped-query attention factor (TEMP!!! use 8 for LLaMAv2 70B) (default: %d)\n", params.n_gqa);
|
||||
fprintf(stdout, " -eps N, --rms-norm-eps N rms norm eps (TEMP!!! use 1e-5 for LLaMAv2) (default: %.1e)\n", params.rms_norm_eps);
|
||||
fprintf(stdout, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
|
||||
fprintf(stdout, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
|
||||
fprintf(stdout, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
@@ -623,17 +665,17 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||
{
|
||||
fprintf(stdout, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stdout, " --numa attempt optimizations that help on some NUMA systems\n");
|
||||
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
|
||||
fprintf(stdout, " -ngl N, --n-gpu-layers N\n");
|
||||
fprintf(stdout, " number of layers to store in VRAM\n");
|
||||
fprintf(stdout, " -ts SPLIT --tensor-split SPLIT\n");
|
||||
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
||||
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
||||
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");
|
||||
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n");
|
||||
fprintf(stdout, " -mmq, --mul-mat-q use experimental mul_mat_q CUDA kernels instead of cuBLAS. TEMP!!!\n" );
|
||||
fprintf(stdout, " Reduces VRAM usage by 700/970/1430 MiB for 7b/13b/33b but prompt processing speed\n" );
|
||||
fprintf(stdout, " is still suboptimal, especially q2_K, q3_K, q5_K, and q6_K.\n" );
|
||||
fprintf(stdout, " -nommq, --no-mul-mat-q\n");
|
||||
fprintf(stdout, " use cuBLAS instead of custom mul_mat_q CUDA kernels.\n");
|
||||
fprintf(stdout, " Not recommended since this is both slower and uses more VRAM.\n");
|
||||
#endif
|
||||
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
||||
fprintf(stdout, " model path (default: %s)\n", params.model.c_str());
|
||||
@@ -729,23 +771,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
}
|
||||
params.n_ctx = std::stoi(argv[i]);
|
||||
}
|
||||
else if (arg == "-gqa" || arg == "--gqa")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_gqa = std::stoi(argv[i]);
|
||||
}
|
||||
else if (arg == "-eps" || arg == "--rms-norm-eps") {
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rms_norm_eps = std::stof(argv[i]);
|
||||
}
|
||||
else if (arg == "--rope-freq-base")
|
||||
{
|
||||
if (++i >= argc)
|
||||
@@ -841,12 +866,12 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n", {});
|
||||
#endif // GGML_USE_CUBLAS
|
||||
}
|
||||
else if (arg == "--mul-mat-q" || arg == "-mmq")
|
||||
else if (arg == "--no-mul-mat-q" || arg == "-nommq")
|
||||
{
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
params.mul_mat_q = true;
|
||||
params.mul_mat_q = false;
|
||||
#else
|
||||
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. It is not possible to use mul_mat_q kernels.\n", {});
|
||||
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. Disabling mul_mat_q kernels has no effect.\n", {});
|
||||
#endif // GGML_USE_CUBLAS
|
||||
}
|
||||
else if (arg == "--main-gpu" || arg == "-mg")
|
||||
@@ -897,6 +922,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
{
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--numa")
|
||||
{
|
||||
params.numa = true;
|
||||
}
|
||||
else if (arg == "--embedding")
|
||||
{
|
||||
params.embedding = true;
|
||||
@@ -919,7 +948,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
|
||||
static json format_generation_settings(llama_server_context &llama)
|
||||
{
|
||||
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos());
|
||||
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
|
||||
const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
|
||||
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
||||
|
||||
@@ -947,6 +976,7 @@ static json format_generation_settings(llama_server_context &llama)
|
||||
{"stream", llama.stream},
|
||||
{"logit_bias", llama.params.logit_bias},
|
||||
{"n_probs", llama.params.n_probs},
|
||||
{"grammar", llama.params.grammar},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -964,7 +994,7 @@ static json format_timings(llama_server_context &llama)
|
||||
assert(timings.n_eval == llama.num_tokens_predicted);
|
||||
|
||||
return json{
|
||||
{"prompt_n", timings.n_eval},
|
||||
{"prompt_n", timings.n_p_eval},
|
||||
{"prompt_ms", timings.t_p_eval_ms},
|
||||
{"prompt_per_token_ms", timings.t_p_eval_ms / timings.n_p_eval},
|
||||
{"prompt_per_second", 1e3 / timings.t_p_eval_ms * timings.n_p_eval},
|
||||
@@ -993,7 +1023,6 @@ static json format_final_response(llama_server_context &llama, const std::string
|
||||
{"stopped_limit", llama.stopped_limit},
|
||||
{"stopping_word", llama.stopping_word},
|
||||
{"tokens_cached", llama.n_past},
|
||||
{"tokens_predicted", llama.num_tokens_predicted},
|
||||
{"timings", format_timings(llama)},
|
||||
};
|
||||
|
||||
@@ -1026,34 +1055,44 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
|
||||
{"tokens", tokens}};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||
{
|
||||
// Fallback null to default value
|
||||
return body.contains(key) && !body.at(key).is_null()
|
||||
? body.value(key, default_value)
|
||||
: default_value;
|
||||
}
|
||||
|
||||
static void parse_options_completion(const json &body, llama_server_context &llama)
|
||||
{
|
||||
gpt_params default_params;
|
||||
|
||||
llama.stream = body.value("stream", false);
|
||||
llama.params.n_predict = body.value("n_predict", default_params.n_predict);
|
||||
llama.params.top_k = body.value("top_k", default_params.top_k);
|
||||
llama.params.top_p = body.value("top_p", default_params.top_p);
|
||||
llama.params.tfs_z = body.value("tfs_z", default_params.tfs_z);
|
||||
llama.params.typical_p = body.value("typical_p", default_params.typical_p);
|
||||
llama.params.repeat_last_n = body.value("repeat_last_n", default_params.repeat_last_n);
|
||||
llama.params.temp = body.value("temperature", default_params.temp);
|
||||
llama.params.repeat_penalty = body.value("repeat_penalty", default_params.repeat_penalty);
|
||||
llama.params.presence_penalty = body.value("presence_penalty", default_params.presence_penalty);
|
||||
llama.params.frequency_penalty = body.value("frequency_penalty", default_params.frequency_penalty);
|
||||
llama.params.mirostat = body.value("mirostat", default_params.mirostat);
|
||||
llama.params.mirostat_tau = body.value("mirostat_tau", default_params.mirostat_tau);
|
||||
llama.params.mirostat_eta = body.value("mirostat_eta", default_params.mirostat_eta);
|
||||
llama.params.penalize_nl = body.value("penalize_nl", default_params.penalize_nl);
|
||||
llama.params.n_keep = body.value("n_keep", default_params.n_keep);
|
||||
llama.params.seed = body.value("seed", default_params.seed);
|
||||
llama.params.prompt = body.value("prompt", default_params.prompt);
|
||||
llama.params.n_probs = body.value("n_probs", default_params.n_probs);
|
||||
llama.stream = json_value(body, "stream", false);
|
||||
llama.params.n_predict = json_value(body, "n_predict", default_params.n_predict);
|
||||
llama.params.top_k = json_value(body, "top_k", default_params.top_k);
|
||||
llama.params.top_p = json_value(body, "top_p", default_params.top_p);
|
||||
llama.params.tfs_z = json_value(body, "tfs_z", default_params.tfs_z);
|
||||
llama.params.typical_p = json_value(body, "typical_p", default_params.typical_p);
|
||||
llama.params.repeat_last_n = json_value(body, "repeat_last_n", default_params.repeat_last_n);
|
||||
llama.params.temp = json_value(body, "temperature", default_params.temp);
|
||||
llama.params.repeat_penalty = json_value(body, "repeat_penalty", default_params.repeat_penalty);
|
||||
llama.params.presence_penalty = json_value(body, "presence_penalty", default_params.presence_penalty);
|
||||
llama.params.frequency_penalty = json_value(body, "frequency_penalty", default_params.frequency_penalty);
|
||||
llama.params.mirostat = json_value(body, "mirostat", default_params.mirostat);
|
||||
llama.params.mirostat_tau = json_value(body, "mirostat_tau", default_params.mirostat_tau);
|
||||
llama.params.mirostat_eta = json_value(body, "mirostat_eta", default_params.mirostat_eta);
|
||||
llama.params.penalize_nl = json_value(body, "penalize_nl", default_params.penalize_nl);
|
||||
llama.params.n_keep = json_value(body, "n_keep", default_params.n_keep);
|
||||
llama.params.seed = json_value(body, "seed", default_params.seed);
|
||||
llama.params.prompt = json_value(body, "prompt", default_params.prompt);
|
||||
llama.params.grammar = json_value(body, "grammar", default_params.grammar);
|
||||
llama.params.n_probs = json_value(body, "n_probs", default_params.n_probs);
|
||||
|
||||
llama.params.logit_bias.clear();
|
||||
if (body.value("ignore_eos", false))
|
||||
if (json_value(body, "ignore_eos", false))
|
||||
{
|
||||
llama.params.logit_bias[llama_token_eos()] = -INFINITY;
|
||||
llama.params.logit_bias[llama_token_eos(llama.ctx)] = -INFINITY;
|
||||
}
|
||||
|
||||
const auto &logit_bias = body.find("logit_bias");
|
||||
@@ -1169,6 +1208,12 @@ int main(int argc, char **argv)
|
||||
res.set_content(reinterpret_cast<const char*>(&completion_js), completion_js_len, "application/javascript");
|
||||
return false; });
|
||||
|
||||
// this is only called if no index.html is found in the public --path
|
||||
svr.Get("/json-schema-to-grammar.mjs", [](const Request &, Response &res)
|
||||
{
|
||||
res.set_content(reinterpret_cast<const char*>(&json_schema_to_grammar_mjs), json_schema_to_grammar_mjs_len, "application/javascript");
|
||||
return false; });
|
||||
|
||||
svr.Post("/completion", [&llama](const Request &req, Response &res)
|
||||
{
|
||||
auto lock = llama.lock();
|
||||
@@ -1179,6 +1224,12 @@ int main(int argc, char **argv)
|
||||
|
||||
parse_options_completion(json::parse(req.body), llama);
|
||||
|
||||
if (!llama.loadGrammar())
|
||||
{
|
||||
res.status = 400;
|
||||
return;
|
||||
}
|
||||
|
||||
llama.loadPrompt();
|
||||
llama.beginCompletion();
|
||||
|
||||
@@ -1274,7 +1325,11 @@ int main(int argc, char **argv)
|
||||
sink.done();
|
||||
return true;
|
||||
};
|
||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
|
||||
const auto on_complete = [&](bool) {
|
||||
llama.mutex.unlock();
|
||||
};
|
||||
lock.release();
|
||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||
} });
|
||||
|
||||
svr.Get("/model.json", [&llama](const Request &, Response &res)
|
||||
@@ -1290,7 +1345,7 @@ int main(int argc, char **argv)
|
||||
auto lock = llama.lock();
|
||||
|
||||
const json body = json::parse(req.body);
|
||||
const std::string content = body.value("content", "");
|
||||
const std::string content = json_value<std::string>(body, "content", "");
|
||||
const std::vector<llama_token> tokens = llama_tokenize(llama.ctx, content, false);
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
return res.set_content(data.dump(), "application/json"); });
|
||||
@@ -1303,7 +1358,7 @@ int main(int argc, char **argv)
|
||||
|
||||
llama.rewind();
|
||||
llama_reset_timings(llama.ctx);
|
||||
llama.params.prompt = body.value("content", "");
|
||||
llama.params.prompt = json_value<std::string>(body, "content", "");
|
||||
llama.params.n_predict = 0;
|
||||
llama.loadPrompt();
|
||||
llama.beginCompletion();
|
||||
@@ -1330,8 +1385,12 @@ int main(int argc, char **argv)
|
||||
|
||||
svr.set_error_handler([](const Request &, Response &res)
|
||||
{
|
||||
res.set_content("File Not Found", "text/plain");
|
||||
res.status = 404; });
|
||||
if (res.status == 400) {
|
||||
res.set_content("Invalid request", "text/plain");
|
||||
} else if (res.status != 500) {
|
||||
res.set_content("File Not Found", "text/plain");
|
||||
res.status = 404;
|
||||
} });
|
||||
|
||||
// set timeouts and change hostname and port
|
||||
svr.set_read_timeout(sparams.read_timeout);
|
||||
@@ -1359,6 +1418,9 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (llama.grammar != nullptr) {
|
||||
llama_grammar_free(llama.grammar);
|
||||
}
|
||||
llama_backend_free();
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -2,180 +2,129 @@
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "build-info.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cinttypes>
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#elif defined (_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
//---------------------------------
|
||||
// Print help :
|
||||
//---------------------------------
|
||||
|
||||
if ( argc == 1 || argv[1][0] == '-' )
|
||||
{
|
||||
printf( "usage: %s MODEL_PATH [PROMPT]\n" , argv[0] );
|
||||
if (argc == 1 || argv[1][0] == '-') {
|
||||
printf("usage: %s MODEL_PATH [PROMPT]\n" , argv[0]);
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
//---------------------------------
|
||||
// Load parameters :
|
||||
//---------------------------------
|
||||
|
||||
if ( argc >= 2 )
|
||||
{
|
||||
if (argc >= 2) {
|
||||
params.model = argv[1];
|
||||
}
|
||||
|
||||
if ( argc >= 3 )
|
||||
{
|
||||
if (argc >= 3) {
|
||||
params.prompt = argv[2];
|
||||
}
|
||||
|
||||
if ( params.prompt.empty() )
|
||||
{
|
||||
if (params.prompt.empty()) {
|
||||
params.prompt = "Hello my name is";
|
||||
}
|
||||
|
||||
//---------------------------------
|
||||
// Init LLM :
|
||||
//---------------------------------
|
||||
// init LLM
|
||||
|
||||
llama_backend_init(params.numa);
|
||||
|
||||
llama_model * model;
|
||||
llama_context * ctx;
|
||||
llama_context_params ctx_params = llama_context_default_params();
|
||||
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params( params );
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), ctx_params);
|
||||
|
||||
if ( model == NULL )
|
||||
{
|
||||
fprintf( stderr , "%s: error: unable to load model\n" , __func__ );
|
||||
if (model == NULL) {
|
||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//---------------------------------
|
||||
// Tokenize the prompt :
|
||||
//---------------------------------
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
// tokenize the prompt
|
||||
|
||||
std::vector<llama_token> tokens_list;
|
||||
tokens_list = ::llama_tokenize( ctx , params.prompt , true );
|
||||
tokens_list = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
const int max_context_size = llama_n_ctx( ctx );
|
||||
const int max_tokens_list_size = max_context_size - 4 ;
|
||||
const int max_context_size = llama_n_ctx(ctx);
|
||||
const int max_tokens_list_size = max_context_size - 4;
|
||||
|
||||
if ( (int)tokens_list.size() > max_tokens_list_size )
|
||||
{
|
||||
fprintf( stderr , "%s: error: prompt too long (%d tokens, max %d)\n" ,
|
||||
__func__ , (int)tokens_list.size() , max_tokens_list_size );
|
||||
if ((int) tokens_list.size() > max_tokens_list_size) {
|
||||
fprintf(stderr, "%s: error: prompt too long (%d tokens, max %d)\n", __func__, (int) tokens_list.size(), max_tokens_list_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf( stderr, "\n\n" );
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
// Print the tokens from the prompt :
|
||||
|
||||
for( auto id : tokens_list )
|
||||
{
|
||||
printf( "%s" , llama_token_to_str( ctx , id ) );
|
||||
for (auto id : tokens_list) {
|
||||
fprintf(stderr, "%s", llama_token_to_str(ctx, id).c_str());
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
|
||||
|
||||
//---------------------------------
|
||||
// Main prediction loop :
|
||||
//---------------------------------
|
||||
// main loop
|
||||
|
||||
// The LLM keeps a contextual cache memory of previous token evaluation.
|
||||
// Usually, once this cache is full, it is required to recompute a compressed context based on previous
|
||||
// tokens (see "infinite text generation via context swapping" in the main example), but in this minimalist
|
||||
// example, we will just stop the loop once this cache is full or once an end of stream is detected.
|
||||
|
||||
while ( llama_get_kv_cache_token_count( ctx ) < max_context_size )
|
||||
{
|
||||
//---------------------------------
|
||||
// Evaluate the tokens :
|
||||
//---------------------------------
|
||||
const int n_gen = std::min(32, max_context_size);
|
||||
|
||||
if ( llama_eval( ctx , tokens_list.data() , tokens_list.size() , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) )
|
||||
{
|
||||
fprintf( stderr, "%s : failed to eval\n" , __func__ );
|
||||
while (llama_get_kv_cache_token_count(ctx) < n_gen) {
|
||||
// evaluate the transformer
|
||||
|
||||
if (llama_eval(ctx, tokens_list.data(), int(tokens_list.size()), llama_get_kv_cache_token_count(ctx), params.n_threads)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
tokens_list.clear();
|
||||
|
||||
//---------------------------------
|
||||
// Select the best prediction :
|
||||
//---------------------------------
|
||||
// sample the next token
|
||||
|
||||
llama_token new_token_id = 0;
|
||||
|
||||
auto logits = llama_get_logits( ctx );
|
||||
auto n_vocab = llama_n_vocab( ctx ); // the size of the LLM vocabulary (in tokens)
|
||||
auto logits = llama_get_logits(ctx);
|
||||
auto n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve( n_vocab );
|
||||
candidates.reserve(n_vocab);
|
||||
|
||||
for( llama_token token_id = 0 ; token_id < n_vocab ; token_id++ )
|
||||
{
|
||||
candidates.emplace_back( llama_token_data{ token_id , logits[ token_id ] , 0.0f } );
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
|
||||
}
|
||||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
|
||||
// Select it using the "Greedy sampling" method :
|
||||
new_token_id = llama_sample_token_greedy( ctx , &candidates_p );
|
||||
|
||||
new_token_id = llama_sample_token_greedy(ctx , &candidates_p);
|
||||
|
||||
// is it an end of stream ?
|
||||
if ( new_token_id == llama_token_eos() )
|
||||
{
|
||||
if (new_token_id == llama_token_eos(ctx)) {
|
||||
fprintf(stderr, " [end of text]\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// Print the new token :
|
||||
printf( "%s" , llama_token_to_str( ctx , new_token_id ) );
|
||||
fflush( stdout );
|
||||
// print the new token :
|
||||
printf("%s", llama_token_to_str(ctx, new_token_id).c_str());
|
||||
fflush(stdout);
|
||||
|
||||
// Push this new token for next evaluation :
|
||||
tokens_list.push_back( new_token_id );
|
||||
// push this new token for next evaluation
|
||||
tokens_list.push_back(new_token_id);
|
||||
}
|
||||
|
||||
} // wend of main loop
|
||||
|
||||
llama_free( ctx );
|
||||
llama_free_model( model );
|
||||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
fprintf(stderr, "\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// EOF
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "ggml.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@@ -16,7 +17,7 @@
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||
static const float rms_norm_eps = 1e-5f;
|
||||
|
||||
struct random_normal_distribution {
|
||||
std::mt19937 gen;
|
||||
@@ -169,14 +170,16 @@ struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struc
|
||||
struct llama_vocab {
|
||||
using id = int32_t;
|
||||
using token = std::string;
|
||||
using ttype = llama_token_type;
|
||||
|
||||
struct token_score {
|
||||
token tok;
|
||||
struct token_data {
|
||||
token text;
|
||||
float score;
|
||||
ttype type;
|
||||
};
|
||||
|
||||
std::unordered_map<token, id> token_to_id;
|
||||
std::vector<token_score> id_to_token;
|
||||
std::vector<token_data> id_to_token;
|
||||
};
|
||||
|
||||
struct my_llama_hparams {
|
||||
@@ -1865,10 +1868,10 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
|
||||
t12->grad = expand(gb, ggml_permute(ctx0, t15->grad, 0, 2, 3, 1)); assert_shape_4d(t12->grad, N, n_batch, n_embd/n_head, n_head);
|
||||
t11->grad = expand(gb, ggml_reshape_2d(ctx0, ggml_cont(ctx0, t12->grad), N*n_batch, n_embd)); assert_shape_2d(t11->grad, N*n_batch, n_embd);
|
||||
t10->grad = expand(gb, ggml_permute(ctx0, t14->grad, 0, 2, 1, 3)); assert_shape_4d(t10->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t09->grad = expand(gb, ggml_rope_back(ctx0, t10->grad, n_past, n_rot, rope_mode, n_ctx)); assert_shape_4d(t09->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t09->grad = expand(gb, ggml_rope_back(ctx0, t10->grad, n_past, n_rot, rope_mode, n_ctx, 10000.0f, 1.0f, 0.0f, false)); assert_shape_4d(t09->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t08->grad = expand(gb, ggml_reshape_2d(ctx0, t09->grad, n_embd, N*n_batch)); assert_shape_2d(t08->grad, n_embd, N*n_batch);
|
||||
t07->grad = expand(gb, ggml_permute(ctx0, t13->grad, 0, 2, 1, 3)); assert_shape_4d(t07->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t06->grad = expand(gb, ggml_rope_back(ctx0, t07->grad, n_past, n_rot, rope_mode, n_ctx)); assert_shape_4d(t06->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t06->grad = expand(gb, ggml_rope_back(ctx0, t07->grad, n_past, n_rot, rope_mode, n_ctx, 10000.0f, 1.0f, 0.0f, false)); assert_shape_4d(t06->grad, n_embd/n_head, n_head, N, n_batch);
|
||||
t05->grad = expand(gb, ggml_reshape_2d(ctx0, t06->grad, n_embd, N*n_batch)); assert_shape_2d(t05->grad, n_embd, N*n_batch);
|
||||
t04->grad = expand(gb, ggml_add_inplace(ctx0,
|
||||
ggml_add_inplace(ctx0,
|
||||
@@ -1961,7 +1964,7 @@ void print_matrix(struct ggml_tensor * probs) {
|
||||
|
||||
|
||||
void print_token(struct llama_context * ctx, llama_token token) {
|
||||
printf("%s", llama_token_to_str(ctx, token));
|
||||
printf("%s", llama_token_to_str(ctx, token).c_str());
|
||||
}
|
||||
|
||||
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
|
||||
@@ -1995,7 +1998,7 @@ void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens)
|
||||
}
|
||||
}
|
||||
|
||||
void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
int n_tokens = tokens_input->ne[0];
|
||||
int n_vocab = target_logits->ne[0];
|
||||
|
||||
@@ -2004,7 +2007,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
||||
|
||||
ggml_set_f32(target_logits, -1.0f/n_vocab);
|
||||
ggml_set_f32(target_probs, 0.0f);
|
||||
ggml_set_i32_1d(tokens_input, 0, llama_token_bos());
|
||||
ggml_set_i32_1d(tokens_input, 0, llama_token_bos(lctx));
|
||||
for (int i=1; i<n_tokens+1; ++i) {
|
||||
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
||||
set_f32_2d(target_logits, token, i-1, +1.0f);
|
||||
@@ -2015,7 +2018,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
||||
}
|
||||
}
|
||||
|
||||
void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||
GGML_ASSERT(target_logits->n_dims == 3);
|
||||
GGML_ASSERT(target_probs->n_dims == 3);
|
||||
@@ -2035,7 +2038,7 @@ void get_example_targets_batch(struct llama_context * /*lctx*/, const int * trai
|
||||
size_t sample = train_samples[(example_id*n_batch + k) % n_train_samples];
|
||||
GGML_ASSERT(sample+n_tokens-1 < n_train_data);
|
||||
|
||||
set_i32_2d(tokens_input, 0, k, llama_token_bos());
|
||||
set_i32_2d(tokens_input, 0, k, llama_token_bos(lctx));
|
||||
for (int i=1; i<n_tokens+1; ++i) {
|
||||
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
||||
// print_token(lctx, token);
|
||||
@@ -2188,11 +2191,10 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
||||
f.read_raw(buf.data(), f.size);
|
||||
buf[f.size] = '\0';
|
||||
|
||||
out.resize(buf.size());
|
||||
|
||||
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), buf.size(), false);
|
||||
if (n_tokens >= 0) {
|
||||
out.resize(n_tokens);
|
||||
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||
if (n_tokens < 0) {
|
||||
out.resize(-n_tokens);
|
||||
llama_tokenize(lctx, buf.data(), out.data(), out.size(), false);
|
||||
}
|
||||
|
||||
bool verify = false;
|
||||
@@ -2200,17 +2202,17 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
||||
const char * in = buf.data();
|
||||
const char * end = buf.data() + buf.size();
|
||||
for (int i = 0; i < (int) out.size(); ++i) {
|
||||
const char * s = llama_token_to_str(lctx, out[i]);
|
||||
int len = strlen(s);
|
||||
std::string s = llama_token_to_str(lctx, out[i]);
|
||||
int len = s.length();
|
||||
if (in >= end) {
|
||||
printf("%s: unexpected end of original text.\n", __func__);
|
||||
break;
|
||||
}
|
||||
const bool matches = (strncmp(in, s, len) == 0);
|
||||
const bool matches = (strncmp(in, s.c_str(), len) == 0);
|
||||
if (matches) {
|
||||
in += len;
|
||||
} else {
|
||||
printf("%s: mismatch: expected '%s', but got '%s'\n", __func__, std::string(in, len).c_str(), s);
|
||||
printf("%s: mismatch: expected '%s', but got '%s'\n", __func__, std::string(in, len).c_str(), s.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2294,7 +2296,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
|
||||
const auto params = sampler->params;
|
||||
|
||||
// Apply penalties
|
||||
const float nl_logit = logits[llama_token_nl()];
|
||||
const float nl_logit = logits[llama_token_nl(ctx)];
|
||||
|
||||
const int n_last = std::min(std::min(n_last_tokens, params.repeat_last_n), sampler->n_ctx);
|
||||
|
||||
@@ -2313,7 +2315,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
|
||||
params.alpha_presence);
|
||||
|
||||
if (!params.penalize_nl) {
|
||||
logits[llama_token_nl()] = nl_logit;
|
||||
logits[llama_token_nl(ctx)] = nl_logit;
|
||||
}
|
||||
|
||||
llama_token token = 0;
|
||||
@@ -2612,42 +2614,45 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
|
||||
return;
|
||||
}
|
||||
|
||||
// write_magic
|
||||
file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
// write_hparams
|
||||
file.write_u32(model->hparams.n_vocab);
|
||||
file.write_u32(model->hparams.n_embd);
|
||||
file.write_u32(model->hparams.n_mult);
|
||||
file.write_u32(model->hparams.n_head);
|
||||
file.write_u32(model->hparams.n_layer);
|
||||
file.write_u32(model->hparams.n_rot);
|
||||
file.write_u32(LLAMA_FTYPE_ALL_F32);
|
||||
// write_vocab
|
||||
uint32_t n_vocab = model->hparams.n_vocab;
|
||||
for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
const auto & token_score = vocab->id_to_token.at(i);
|
||||
file.write_u32((uint32_t) token_score.tok.size());
|
||||
file.write_raw(token_score.tok.data(), token_score.tok.size());
|
||||
file.write_raw(&token_score.score, sizeof(token_score.score));
|
||||
}
|
||||
// write tensors
|
||||
write_tensor(&file, model->tok_embeddings);
|
||||
write_tensor(&file, model->norm);
|
||||
write_tensor(&file, model->output);
|
||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
|
||||
write_tensor(&file, layer.attention_norm);
|
||||
write_tensor(&file, layer.wq);
|
||||
write_tensor(&file, layer.wk);
|
||||
write_tensor(&file, layer.wv);
|
||||
write_tensor(&file, layer.wo);
|
||||
write_tensor(&file, layer.ffn_norm);
|
||||
write_tensor(&file, layer.w1);
|
||||
write_tensor(&file, layer.w2);
|
||||
write_tensor(&file, layer.w3);
|
||||
}
|
||||
#pragma message("TODO: implement file saving using gguf")
|
||||
(void) vocab;
|
||||
(void) model;
|
||||
// // write_magic
|
||||
// file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
// file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
// // write_hparams
|
||||
// file.write_u32(model->hparams.n_vocab);
|
||||
// file.write_u32(model->hparams.n_embd);
|
||||
// file.write_u32(model->hparams.n_mult);
|
||||
// file.write_u32(model->hparams.n_head);
|
||||
// file.write_u32(model->hparams.n_layer);
|
||||
// file.write_u32(model->hparams.n_rot);
|
||||
// file.write_u32(LLAMA_FTYPE_ALL_F32);
|
||||
// // write_vocab
|
||||
// uint32_t n_vocab = model->hparams.n_vocab;
|
||||
// for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
// const auto & token_data = vocab->id_to_token.at(i);
|
||||
// file.write_u32((uint32_t) token_data.tok.size());
|
||||
// file.write_raw(token_data.tok.data(), token_data.tok.size());
|
||||
// file.write_raw(&token_data.score, sizeof(token_data.score));
|
||||
// }
|
||||
// // write tensors
|
||||
// write_tensor(&file, model->tok_embeddings);
|
||||
// write_tensor(&file, model->norm);
|
||||
// write_tensor(&file, model->output);
|
||||
// for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||
// auto & layer = model->layers[i];
|
||||
//
|
||||
// write_tensor(&file, layer.attention_norm);
|
||||
// write_tensor(&file, layer.wq);
|
||||
// write_tensor(&file, layer.wk);
|
||||
// write_tensor(&file, layer.wv);
|
||||
// write_tensor(&file, layer.wo);
|
||||
// write_tensor(&file, layer.ffn_norm);
|
||||
// write_tensor(&file, layer.w1);
|
||||
// write_tensor(&file, layer.w2);
|
||||
// write_tensor(&file, layer.w3);
|
||||
// }
|
||||
}
|
||||
|
||||
float cosine_decay(const int decay_steps, const float alpha, int step) {
|
||||
@@ -3052,20 +3057,13 @@ int main(int argc, char ** argv) {
|
||||
|
||||
struct llama_vocab vocab;
|
||||
{
|
||||
std::vector<const char *> strings;
|
||||
std::vector<float> scores;
|
||||
int n_vocab = llama_n_vocab(lctx);
|
||||
strings.resize(n_vocab, NULL);
|
||||
scores.resize(n_vocab, 0);
|
||||
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
|
||||
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
|
||||
const int n_vocab = llama_n_vocab(lctx);
|
||||
vocab.id_to_token.resize(n_vocab);
|
||||
for (int i=0; i<n_vocab; ++i) {
|
||||
std::string tok = std::string(strings[i]);
|
||||
float score = scores[i];
|
||||
vocab.id_to_token[i].tok = tok;
|
||||
vocab.id_to_token[i].score = score;
|
||||
vocab.token_to_id.emplace(tok, i);
|
||||
vocab.id_to_token[i].text = llama_token_get_text(lctx, i);
|
||||
vocab.id_to_token[i].score = llama_token_get_score(lctx, i);
|
||||
vocab.id_to_token[i].type = llama_token_get_type(lctx, i);
|
||||
vocab.token_to_id.emplace(vocab.id_to_token[i].text, i);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3178,7 +3176,7 @@ int main(int argc, char ** argv) {
|
||||
std::vector<int> train_samples;
|
||||
train_samples.push_back(0);
|
||||
for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
|
||||
if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
|
||||
if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl(lctx))) {
|
||||
train_samples.push_back(i);
|
||||
}
|
||||
}
|
||||
@@ -3338,7 +3336,7 @@ int main(int argc, char ** argv) {
|
||||
struct ggml_tensor * target_logits = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
||||
struct ggml_tensor * target_probs = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
||||
|
||||
get_example_targets(train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
|
||||
get_example_targets(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
|
||||
for (int i=sample_ctx; i<n_tokens; ++i) {
|
||||
ggml_set_i32_1d(tokens_input, i, n_vocab/2);
|
||||
}
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
with pkgs.darwin.apple_sdk_11_0.frameworks; [
|
||||
Accelerate
|
||||
MetalKit
|
||||
MetalPerformanceShaders
|
||||
MetalPerformanceShadersGraph
|
||||
]
|
||||
else if isAarch32 && isDarwin then
|
||||
with pkgs.darwin.apple_sdk.frameworks; [
|
||||
|
||||
54
ggml-alloc.c
54
ggml-alloc.c
@@ -67,6 +67,8 @@ struct ggml_allocr {
|
||||
struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
|
||||
size_t max_size;
|
||||
bool measure;
|
||||
int parse_seq[GGML_MAX_NODES];
|
||||
bool has_parse_seq;
|
||||
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
struct ggml_tensor * allocated_tensors[1024];
|
||||
@@ -74,7 +76,7 @@ struct ggml_allocr {
|
||||
};
|
||||
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
static void add_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
||||
static void add_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
if (alloc->allocated_tensors[i] == NULL) {
|
||||
alloc->allocated_tensors[i] = tensor;
|
||||
@@ -83,7 +85,7 @@ static void add_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tens
|
||||
}
|
||||
GGML_ASSERT(!"out of allocated_tensors");
|
||||
}
|
||||
static void remove_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
||||
static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
if (alloc->allocated_tensors[i] == tensor ||
|
||||
(alloc->allocated_tensors[i] != NULL && alloc->allocated_tensors[i]->data == tensor->data)) {
|
||||
@@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
|
||||
|
||||
size_t max_avail = 0;
|
||||
|
||||
// find the best fitting free block
|
||||
// find the best fitting free block besides the last block
|
||||
int best_fit_block = -1;
|
||||
size_t best_fit_size = SIZE_MAX;
|
||||
for (int i = 0; i < alloc->n_free_blocks; i++) {
|
||||
for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
|
||||
struct free_block * block = &alloc->free_blocks[i];
|
||||
max_avail = MAX(max_avail, block->size);
|
||||
if (block->size >= size && block->size <= best_fit_size) {
|
||||
@@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
|
||||
AT_PRINTF("block %d\n", best_fit_block);
|
||||
|
||||
if (best_fit_block == -1) {
|
||||
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
|
||||
__func__, size, max_avail);
|
||||
GGML_ASSERT(!"not enough space in the buffer");
|
||||
// the last block is our last resort
|
||||
struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
|
||||
if (block->size >= size) {
|
||||
best_fit_block = alloc->n_free_blocks - 1;
|
||||
max_avail = MAX(max_avail, block->size);
|
||||
} else {
|
||||
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
|
||||
__func__, size, max_avail);
|
||||
GGML_ASSERT(!"not enough space in the buffer");
|
||||
return;
|
||||
}
|
||||
}
|
||||
struct free_block * block = &alloc->free_blocks[best_fit_block];
|
||||
void * addr = block->addr;
|
||||
@@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
|
||||
alloc->n_free_blocks++;
|
||||
}
|
||||
|
||||
void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
|
||||
int pos = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (list[i] != -1) {
|
||||
alloc->parse_seq[pos] = list[i];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
alloc->has_parse_seq = true;
|
||||
}
|
||||
|
||||
void ggml_allocr_reset(struct ggml_allocr * alloc) {
|
||||
alloc->n_free_blocks = 1;
|
||||
size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
|
||||
@@ -248,6 +268,8 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
|
||||
/*.hash_table = */ {{0}},
|
||||
/*.max_size = */ 0,
|
||||
/*.measure = */ false,
|
||||
/*.parse_seq = */ {0},
|
||||
/*.has_parse_seq = */ false,
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
/*.allocated_tensors = */ = {0},
|
||||
#endif
|
||||
@@ -275,6 +297,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
|
||||
/*.hash_table = */ {{0}},
|
||||
/*.max_size = */ 0,
|
||||
/*.measure = */ true,
|
||||
/*.parse_seq = */ {0},
|
||||
/*.has_parse_seq = */ false,
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
/*.allocated_tensors = */ = {0},
|
||||
#endif
|
||||
@@ -394,6 +418,14 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
|
||||
if (parent == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
// if the node's data is external, then we cannot re-use it
|
||||
if ((char *) parent->data < (char *) alloc->data ||
|
||||
(char *) parent->data >= ((char *) alloc->data + alloc->size)) {
|
||||
AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data);
|
||||
continue;
|
||||
}
|
||||
|
||||
struct hash_node * p_hn = hash_get(ht, parent);
|
||||
if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
|
||||
if (ggml_is_view(parent)) {
|
||||
@@ -465,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
|
||||
allocate_node(alloc, input);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < gf->n_nodes; i++) {
|
||||
for (int ind = 0; ind < gf->n_nodes; ind++) {
|
||||
int i;
|
||||
if (alloc->has_parse_seq) {
|
||||
i = alloc->parse_seq[ind];
|
||||
} else {
|
||||
i = ind;
|
||||
}
|
||||
struct ggml_tensor * node = gf->nodes[i];
|
||||
|
||||
// allocate parents (leafs)
|
||||
|
||||
@@ -10,6 +10,10 @@ extern "C" {
|
||||
GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
|
||||
GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
|
||||
|
||||
// tell the allocator to parse nodes following the order described in the list
|
||||
// you should call this if your graph are optimized to execute out-of-order
|
||||
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);
|
||||
|
||||
GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
|
||||
GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
|
||||
GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
|
||||
|
||||
2305
ggml-cuda.cu
2305
ggml-cuda.cu
File diff suppressed because it is too large
Load Diff
39
ggml-cuda.h
39
ggml-cuda.h
@@ -8,29 +8,30 @@ extern "C" {
|
||||
|
||||
#define GGML_CUDA_MAX_DEVICES 16
|
||||
|
||||
void ggml_init_cublas(void);
|
||||
void ggml_cuda_set_tensor_split(const float * tensor_split);
|
||||
GGML_API void ggml_init_cublas(void);
|
||||
GGML_API void * ggml_cuda_host_malloc(size_t size);
|
||||
GGML_API void ggml_cuda_host_free(void * ptr);
|
||||
|
||||
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
||||
GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
|
||||
GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
||||
|
||||
// TODO: export these with GGML_API
|
||||
void * ggml_cuda_host_malloc(size_t size);
|
||||
void ggml_cuda_host_free(void * ptr);
|
||||
GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
||||
|
||||
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
|
||||
|
||||
void ggml_cuda_free_data(struct ggml_tensor * tensor);
|
||||
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
||||
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
||||
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
||||
void ggml_cuda_set_main_device(int main_device);
|
||||
void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
||||
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
||||
void ggml_cuda_free_scratch(void);
|
||||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_cuda_set_main_device(int main_device);
|
||||
GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
||||
GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);
|
||||
GGML_API void ggml_cuda_free_scratch(void);
|
||||
GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API int ggml_cuda_get_device_count(void);
|
||||
GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
12
ggml-metal.h
12
ggml-metal.h
@@ -38,6 +38,9 @@ struct ggml_metal_context;
|
||||
struct ggml_metal_context * ggml_metal_init(int n_cb);
|
||||
void ggml_metal_free(struct ggml_metal_context * ctx);
|
||||
|
||||
void * ggml_metal_host_malloc(size_t n);
|
||||
void ggml_metal_host_free (void * data);
|
||||
|
||||
// set the number of command buffers to use
|
||||
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
|
||||
|
||||
@@ -63,10 +66,13 @@ void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
|
||||
|
||||
// try to find operations that can be run concurrently in the graph
|
||||
// you should run it again if the topology of your graph changes
|
||||
void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
|
||||
void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
|
||||
|
||||
// if the graph has been optimized for concurrently dispatch
|
||||
bool ggml_metal_if_optimized(struct ggml_metal_context * ctx);
|
||||
// if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
|
||||
int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
|
||||
|
||||
// output the concur_list for ggml_alloc
|
||||
int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
|
||||
|
||||
// same as ggml_graph_compute but uses Metal
|
||||
// creates gf->n_threads command buffers in parallel
|
||||
|
||||
273
ggml-metal.m
273
ggml-metal.m
@@ -5,7 +5,11 @@
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#import <Metal/Metal.h>
|
||||
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#ifdef GGML_METAL_NDEBUG
|
||||
#define metal_printf(...)
|
||||
@@ -15,6 +19,8 @@
|
||||
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
||||
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
|
||||
|
||||
struct ggml_metal_buffer {
|
||||
const char * name;
|
||||
|
||||
@@ -36,7 +42,7 @@ struct ggml_metal_context {
|
||||
int n_buffers;
|
||||
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
||||
|
||||
int concur_list[GGML_MAX_NODES];
|
||||
int concur_list[GGML_MAX_CONCUR];
|
||||
int concur_list_len;
|
||||
|
||||
// custom kernels
|
||||
@@ -72,6 +78,14 @@ struct ggml_metal_context {
|
||||
GGML_METAL_DECL_KERNEL(mul_mat_q4_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mat_q5_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mat_q6_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_f16_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q4_1_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q2_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q3_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q4_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q5_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(mul_mm_q6_K_f32);
|
||||
GGML_METAL_DECL_KERNEL(rope);
|
||||
GGML_METAL_DECL_KERNEL(alibi_f32);
|
||||
GGML_METAL_DECL_KERNEL(cpy_f32_f16);
|
||||
@@ -103,13 +117,6 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
ctx->n_buffers = 0;
|
||||
ctx->concur_list_len = 0;
|
||||
|
||||
// determine if we can use MPS
|
||||
if (MPSSupportsMTLDevice(ctx->device)) {
|
||||
fprintf(stderr, "%s: using MPS\n", __func__);
|
||||
} else {
|
||||
fprintf(stderr, "%s: not using MPS\n", __func__);
|
||||
GGML_ASSERT(false && "MPS not supported");
|
||||
}
|
||||
|
||||
#if 0
|
||||
// compile from source string and show compile log
|
||||
@@ -119,7 +126,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error];
|
||||
if (error) {
|
||||
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||
exit(1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#else
|
||||
@@ -137,7 +144,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
|
||||
if (error) {
|
||||
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||
exit(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef GGML_QKK_64
|
||||
@@ -149,17 +156,22 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
#endif
|
||||
if (error) {
|
||||
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||
exit(1);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// load kernels
|
||||
{
|
||||
NSError * error = nil;
|
||||
#define GGML_METAL_ADD_KERNEL(name) \
|
||||
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
|
||||
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:nil]; \
|
||||
fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name);
|
||||
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
|
||||
fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name); \
|
||||
if (error) { \
|
||||
fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
GGML_METAL_ADD_KERNEL(add);
|
||||
GGML_METAL_ADD_KERNEL(add_row);
|
||||
@@ -189,6 +201,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
GGML_METAL_ADD_KERNEL(mul_mat_q4_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mat_q5_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mat_q6_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_f16_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q4_0_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q4_1_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q2_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q3_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q4_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q5_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(mul_mm_q6_K_f32);
|
||||
GGML_METAL_ADD_KERNEL(rope);
|
||||
GGML_METAL_ADD_KERNEL(alibi_f32);
|
||||
GGML_METAL_ADD_KERNEL(cpy_f32_f16);
|
||||
@@ -217,15 +237,31 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
void * ggml_metal_host_malloc(size_t n) {
|
||||
void * data = NULL;
|
||||
const int result = posix_memalign((void **) &data, getpagesize(), n);
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "%s: error: posix_memalign failed\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void ggml_metal_host_free(void * data) {
|
||||
free(data);
|
||||
}
|
||||
|
||||
void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
|
||||
ctx->n_cb = n_cb;
|
||||
}
|
||||
|
||||
bool ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
|
||||
if (ctx->concur_list_len) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
|
||||
return ctx->concur_list_len;
|
||||
}
|
||||
|
||||
int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
|
||||
return ctx->concur_list;
|
||||
}
|
||||
|
||||
// finds the Metal buffer that contains the tensor data on the GPU device
|
||||
@@ -368,17 +404,17 @@ void ggml_metal_get_tensor(
|
||||
|
||||
void ggml_metal_graph_find_concurrency(
|
||||
struct ggml_metal_context * ctx,
|
||||
struct ggml_cgraph * gf) {
|
||||
struct ggml_cgraph * gf, bool check_mem) {
|
||||
int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
|
||||
int nodes_unused[GGML_MAX_NODES];
|
||||
int nodes_unused[GGML_MAX_CONCUR];
|
||||
|
||||
for (int i = 0; i < GGML_MAX_NODES; i++) {ctx->concur_list[i] = 0;}
|
||||
for (int i = 0; i < gf->n_nodes; i++) {nodes_unused[i] = 1;}
|
||||
for (int i = 0; i < GGML_MAX_CONCUR; i++) { ctx->concur_list[i] = 0; }
|
||||
for (int i = 0; i < gf->n_nodes; i++) { nodes_unused[i] = 1; }
|
||||
ctx->concur_list_len = 0;
|
||||
|
||||
int n_left = gf->n_nodes;
|
||||
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
||||
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
||||
int n_left = gf->n_nodes;
|
||||
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
||||
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
||||
|
||||
while (n_left > 0) {
|
||||
// number of nodes at a layer (that can be issued concurrently)
|
||||
@@ -386,28 +422,40 @@ void ggml_metal_graph_find_concurrency(
|
||||
for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) {
|
||||
if (nodes_unused[i]) {
|
||||
// if the requirements for gf->nodes[i] are satisfied
|
||||
int exe_flag=1;
|
||||
int exe_flag = 1;
|
||||
|
||||
// scan all srcs
|
||||
for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) {
|
||||
struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind];
|
||||
if (src_cur) {
|
||||
// if is leaf nodes it's satisfied.
|
||||
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {continue;}
|
||||
// TODO: ggml_is_leaf()
|
||||
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// otherwise this src should be the output from previous nodes.
|
||||
int is_found = 0;
|
||||
|
||||
// scan 2*search_depth back because we inserted barrier.
|
||||
for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
||||
if (gf->nodes[ctx->concur_list[j]] == src_cur) {is_found = 1; break;}
|
||||
//for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
||||
for (int j = MAX(0, level_pos - 2*search_depth); j < level_pos; j++) {
|
||||
if (ctx->concur_list[j] >= 0 && gf->nodes[ctx->concur_list[j]] == src_cur) {
|
||||
is_found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (is_found == 0) {
|
||||
exe_flag = 0;
|
||||
break;
|
||||
}
|
||||
if (is_found == 0) {exe_flag = 0; break;}
|
||||
}
|
||||
}
|
||||
if (exe_flag) {
|
||||
if (exe_flag && check_mem) {
|
||||
// check if nodes[i]'s data will be overwritten by a node before nodes[i].
|
||||
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
|
||||
int64_t data_start = (int64_t) gf->nodes[i]->data;
|
||||
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
||||
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
||||
for (int j = n_start; j < i; j++) {
|
||||
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
|
||||
&& gf->nodes[j]->op != GGML_OP_VIEW \
|
||||
@@ -416,9 +464,9 @@ void ggml_metal_graph_find_concurrency(
|
||||
if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
|
||||
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
|
||||
continue;
|
||||
} else {
|
||||
exe_flag = 0;
|
||||
}
|
||||
|
||||
exe_flag = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -435,11 +483,13 @@ void ggml_metal_graph_find_concurrency(
|
||||
ctx->concur_list[level_pos + concurrency] = -1;
|
||||
ctx->concur_list_len++;
|
||||
// jump all sorted nodes at nodes_bak
|
||||
while (!nodes_unused[n_start]) {n_start++;}
|
||||
while (!nodes_unused[n_start]) {
|
||||
n_start++;
|
||||
}
|
||||
level_pos += concurrency + 1;
|
||||
}
|
||||
|
||||
if (ctx->concur_list_len > GGML_MAX_NODES) {
|
||||
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
|
||||
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
|
||||
}
|
||||
}
|
||||
@@ -453,7 +503,7 @@ void ggml_metal_graph_compute(
|
||||
// else fallback to serial dispatch
|
||||
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
||||
|
||||
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_NODES;
|
||||
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_CONCUR;
|
||||
|
||||
const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes;
|
||||
edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial;
|
||||
@@ -485,7 +535,7 @@ void ggml_metal_graph_compute(
|
||||
|
||||
id<MTLCommandBuffer> command_buffer = command_buffers[cb_idx];
|
||||
|
||||
id<MTLComputeCommandEncoder> encoder = nil;
|
||||
id<MTLComputeCommandEncoder> encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
|
||||
const int node_start = (cb_idx + 0) * n_nodes_per_cb;
|
||||
const int node_end = (cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb;
|
||||
@@ -494,10 +544,6 @@ void ggml_metal_graph_compute(
|
||||
const int i = has_concur ? ctx->concur_list[ind] : ind;
|
||||
|
||||
if (i == -1) {
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
continue;
|
||||
}
|
||||
[encoder memoryBarrierWithScope:MTLBarrierScopeBuffers];
|
||||
continue;
|
||||
}
|
||||
@@ -571,10 +617,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_ADD:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
if (ggml_nelements(src1) == ne10) {
|
||||
// src1 is a row
|
||||
[encoder setComputePipelineState:ctx->pipeline_add_row];
|
||||
@@ -592,10 +634,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_MUL:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
if (ggml_nelements(src1) == ne10) {
|
||||
// src1 is a row
|
||||
[encoder setComputePipelineState:ctx->pipeline_mul_row];
|
||||
@@ -613,10 +651,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_SCALE:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const float scale = *(const float *) src1->data;
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_scale];
|
||||
@@ -632,10 +666,6 @@ void ggml_metal_graph_compute(
|
||||
switch (ggml_get_unary_op(gf->nodes[i])) {
|
||||
case GGML_UNARY_OP_SILU:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_silu];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
@@ -646,10 +676,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_UNARY_OP_RELU:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_relu];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
@@ -660,10 +686,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_UNARY_OP_GELU:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_gelu];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
@@ -680,10 +702,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_SOFT_MAX:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const int nth = 32;
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_soft_max];
|
||||
@@ -698,10 +716,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_DIAG_MASK_INF:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const int n_past = ((int32_t *)(dst->op_params))[0];
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_diag_mask_inf];
|
||||
@@ -719,53 +733,43 @@ void ggml_metal_graph_compute(
|
||||
|
||||
GGML_ASSERT(ne00 == ne10);
|
||||
// GGML_ASSERT(ne02 == ne12); // Should be checked on individual data types until broadcast is implemented everywhere
|
||||
uint gqa = ne12/ne02;
|
||||
GGML_ASSERT(ne03 == ne13);
|
||||
|
||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||
if (ggml_is_contiguous(src0) &&
|
||||
ggml_is_contiguous(src1) &&
|
||||
(src0t == GGML_TYPE_F32 || src0t == GGML_TYPE_F16) && ne11 > 1) {
|
||||
|
||||
if (encoder != nil) {
|
||||
[encoder endEncoding];
|
||||
encoder = nil;
|
||||
src1t == GGML_TYPE_F32 &&
|
||||
[ctx->device supportsFamily:MTLGPUFamilyApple7] &&
|
||||
ne00%32 == 0 &&
|
||||
ne11 > 1) {
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break;
|
||||
case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_0_f32]; break;
|
||||
case GGML_TYPE_Q4_1: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_1_f32]; break;
|
||||
case GGML_TYPE_Q2_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q2_K_f32]; break;
|
||||
case GGML_TYPE_Q3_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q3_K_f32]; break;
|
||||
case GGML_TYPE_Q4_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_K_f32]; break;
|
||||
case GGML_TYPE_Q5_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q5_K_f32]; break;
|
||||
case GGML_TYPE_Q6_K: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q6_K_f32]; break;
|
||||
default: GGML_ASSERT(false && "MUL MAT-MAT not implemented");
|
||||
}
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
||||
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
|
||||
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
|
||||
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:5];
|
||||
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:6];
|
||||
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:7];
|
||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:8];
|
||||
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:9];
|
||||
[encoder setBytes:&gqa length:sizeof(gqa) atIndex:10];
|
||||
[encoder setThreadgroupMemoryLength:8192 atIndex:0];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake( (ne11+31)/32, (ne01+63) / 64, ne12) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
|
||||
}
|
||||
|
||||
MPSDataType src0dt = src0t == GGML_TYPE_F32 ? MPSDataTypeFloat32 : MPSDataTypeFloat16;
|
||||
MPSDataType src1dt = src1t == GGML_TYPE_F32 ? MPSDataTypeFloat32 : MPSDataTypeFloat16;
|
||||
|
||||
// for F32 x F32 we use MPS
|
||||
MPSMatrixDescriptor * desc0 = [MPSMatrixDescriptor
|
||||
matrixDescriptorWithRows:ne01 columns:ne00 rowBytes:src0->nb[1] dataType:src0dt];
|
||||
|
||||
MPSMatrixDescriptor * desc1 = [MPSMatrixDescriptor
|
||||
matrixDescriptorWithRows:ne11 columns:ne10 rowBytes:src1->nb[1] dataType:src1dt];
|
||||
|
||||
MPSMatrixDescriptor * desc = [MPSMatrixDescriptor
|
||||
matrixDescriptorWithRows:ne1 columns:ne0 rowBytes:dst->nb[1] dataType:MPSDataTypeFloat32];
|
||||
|
||||
MPSMatrixMultiplication * mul = [[MPSMatrixMultiplication alloc]
|
||||
initWithDevice:ctx->device transposeLeft:false transposeRight:true
|
||||
resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0];
|
||||
|
||||
// we need to do ne12 multiplications
|
||||
// TODO: is there a way to do this in parallel - currently very slow ..
|
||||
// TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS
|
||||
for (int64_t i02 = 0; i02 < ne12; ++i02) {
|
||||
size_t offs_src0_cur = offs_src0 + i02/(ne12/ne02)*nb02; // gqa not used for now
|
||||
size_t offs_src1_cur = offs_src1 + i02*nb12;
|
||||
size_t offs_dst_cur = offs_dst + i02*nb2;
|
||||
|
||||
MPSMatrix * mat_src0 = [[MPSMatrix alloc] initWithBuffer:id_src0 offset:offs_src0_cur descriptor:desc0];
|
||||
MPSMatrix * mat_src1 = [[MPSMatrix alloc] initWithBuffer:id_src1 offset:offs_src1_cur descriptor:desc1];
|
||||
MPSMatrix * mat_dst = [[MPSMatrix alloc] initWithBuffer:id_dst offset:offs_dst_cur descriptor:desc ];
|
||||
|
||||
[mul encodeToCommandBuffer:command_buffer leftMatrix:mat_src1 rightMatrix:mat_src0 resultMatrix:mat_dst];
|
||||
}
|
||||
} else {
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
else {
|
||||
int nth0 = 32;
|
||||
int nth1 = 1;
|
||||
|
||||
@@ -864,23 +868,24 @@ void ggml_metal_graph_compute(
|
||||
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:14];
|
||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:15];
|
||||
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:16];
|
||||
[encoder setBytes:&gqa length:sizeof(gqa) atIndex:17];
|
||||
|
||||
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
|
||||
src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) {
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
}
|
||||
else if (src0t == GGML_TYPE_Q3_K) {
|
||||
#ifdef GGML_QKK_64
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
#else
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+3)/4, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+3)/4, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
#endif
|
||||
}
|
||||
else if (src0t == GGML_TYPE_Q5_K) {
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3) / 4, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3) / 4, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
}
|
||||
else if (src0t == GGML_TYPE_Q6_K) {
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake((ne01+1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
} else {
|
||||
[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||
@@ -889,10 +894,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_GET_ROWS:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_get_rows_f16]; break;
|
||||
case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_get_rows_q4_0]; break;
|
||||
@@ -918,10 +919,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_RMS_NORM:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
|
||||
@@ -941,10 +938,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_NORM:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const float eps = 1e-5f;
|
||||
|
||||
const int nth = 256;
|
||||
@@ -963,10 +956,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_ALIBI:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
GGML_ASSERT((src0t == GGML_TYPE_F32));
|
||||
|
||||
const int n_past = ((int32_t *) dst->op_params)[0]; UNUSED(n_past);
|
||||
@@ -1006,10 +995,6 @@ void ggml_metal_graph_compute(
|
||||
} break;
|
||||
case GGML_OP_ROPE:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const int n_past = ((int32_t *) dst->op_params)[0];
|
||||
const int n_dims = ((int32_t *) dst->op_params)[1];
|
||||
const int mode = ((int32_t *) dst->op_params)[2];
|
||||
@@ -1050,10 +1035,6 @@ void ggml_metal_graph_compute(
|
||||
case GGML_OP_CPY:
|
||||
case GGML_OP_CONT:
|
||||
{
|
||||
if (encoder == nil) {
|
||||
encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
const int nth = 32;
|
||||
|
||||
switch (src0t) {
|
||||
|
||||
971
ggml-metal.metal
971
ggml-metal.metal
File diff suppressed because it is too large
Load Diff
387
ggml.h
387
ggml.h
@@ -183,6 +183,15 @@
|
||||
# define GGML_API
|
||||
#endif
|
||||
|
||||
// TODO: support for clang
|
||||
#ifdef __GNUC__
|
||||
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
||||
#else
|
||||
# define GGML_DEPRECATED(func, hint) func
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
@@ -198,7 +207,7 @@
|
||||
#define GGML_MAX_PARAMS 256
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#define GGML_MAX_SRC 6
|
||||
#define GGML_MAX_NAME 48
|
||||
#define GGML_MAX_NAME 64
|
||||
#define GGML_MAX_OP_PARAMS 32
|
||||
#define GGML_DEFAULT_N_THREADS 4
|
||||
|
||||
@@ -206,6 +215,11 @@
|
||||
#define GGML_EXIT_SUCCESS 0
|
||||
#define GGML_EXIT_ABORTED 1
|
||||
|
||||
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
||||
#define GGUF_VERSION 1
|
||||
|
||||
#define GGUF_DEFAULT_ALIGNMENT 32
|
||||
|
||||
#define GGML_UNUSED(x) (void)(x)
|
||||
|
||||
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
||||
@@ -246,8 +260,9 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
// we use the built-in 16-bit float type
|
||||
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
||||
typedef half ggml_fp16_t;
|
||||
#elif defined(__ARM_NEON)
|
||||
typedef __fp16 ggml_fp16_t;
|
||||
#else
|
||||
typedef uint16_t ggml_fp16_t;
|
||||
@@ -331,10 +346,12 @@ extern "C" {
|
||||
GGML_OP_ARGMAX,
|
||||
GGML_OP_REPEAT,
|
||||
GGML_OP_REPEAT_BACK,
|
||||
GGML_OP_CONCAT,
|
||||
GGML_OP_SILU_BACK,
|
||||
GGML_OP_NORM, // normalize
|
||||
GGML_OP_RMS_NORM,
|
||||
GGML_OP_RMS_NORM_BACK,
|
||||
GGML_OP_GROUP_NORM,
|
||||
|
||||
GGML_OP_MUL_MAT,
|
||||
GGML_OP_OUT_PROD,
|
||||
@@ -360,20 +377,29 @@ extern "C" {
|
||||
GGML_OP_CLAMP,
|
||||
GGML_OP_CONV_1D,
|
||||
GGML_OP_CONV_2D,
|
||||
GGML_OP_CONV_TRANSPOSE_2D,
|
||||
GGML_OP_POOL_1D,
|
||||
GGML_OP_POOL_2D,
|
||||
|
||||
GGML_OP_UPSCALE, // nearest interpolate
|
||||
|
||||
GGML_OP_FLASH_ATTN,
|
||||
GGML_OP_FLASH_FF,
|
||||
GGML_OP_FLASH_ATTN_BACK,
|
||||
GGML_OP_WIN_PART,
|
||||
GGML_OP_WIN_UNPART,
|
||||
GGML_OP_GET_REL_POS,
|
||||
GGML_OP_ADD_REL_POS,
|
||||
|
||||
GGML_OP_UNARY,
|
||||
|
||||
GGML_OP_MAP_UNARY,
|
||||
GGML_OP_MAP_BINARY,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1_F32,
|
||||
GGML_OP_MAP_CUSTOM2_F32,
|
||||
GGML_OP_MAP_CUSTOM3_F32,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1,
|
||||
GGML_OP_MAP_CUSTOM2,
|
||||
GGML_OP_MAP_CUSTOM3,
|
||||
@@ -549,6 +575,7 @@ extern "C" {
|
||||
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
||||
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
||||
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
|
||||
|
||||
GGML_API int ggml_blck_size (enum ggml_type type);
|
||||
@@ -570,6 +597,8 @@ extern "C" {
|
||||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
// use this to compute the memory overhead of a tensor
|
||||
GGML_API size_t ggml_tensor_overhead(void);
|
||||
|
||||
@@ -784,6 +813,13 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// concat a and b on dim 2
|
||||
// used in stable-diffusion
|
||||
GGML_API struct ggml_tensor * ggml_concat(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_abs(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
@@ -892,6 +928,19 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
float eps);
|
||||
|
||||
// group normalize along ne0*ne1*n_groups
|
||||
// used in stable-diffusion
|
||||
// TODO: eps is hardcoded to 1e-6 for now
|
||||
GGML_API struct ggml_tensor * ggml_group_norm(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_groups);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_groups);
|
||||
|
||||
// a - x
|
||||
// b - dy
|
||||
// TODO: update with configurable eps
|
||||
@@ -1192,6 +1241,15 @@ extern "C" {
|
||||
float freq_base,
|
||||
float freq_scale);
|
||||
|
||||
// xPos RoPE, in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
float base,
|
||||
bool down);
|
||||
|
||||
// rotary position embedding backward, i.e compute dx from dy
|
||||
// a - dy
|
||||
GGML_API struct ggml_tensor * ggml_rope_back(
|
||||
@@ -1200,7 +1258,11 @@ extern "C" {
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode,
|
||||
int n_ctx);
|
||||
int n_ctx,
|
||||
float freq_base,
|
||||
float freq_scale,
|
||||
float xpos_base,
|
||||
bool xpos_down);
|
||||
|
||||
// alibi position embedding
|
||||
// in-place, returns view(a)
|
||||
@@ -1227,6 +1289,15 @@ extern "C" {
|
||||
int p0, // padding
|
||||
int d0); // dilation
|
||||
|
||||
// conv_1d with padding = half
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s,
|
||||
int d);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
@@ -1238,14 +1309,38 @@ extern "C" {
|
||||
int d0,
|
||||
int d1);
|
||||
|
||||
// conv_1d with padding = half
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
|
||||
// kernel size is a->ne[0] x a->ne[1]
|
||||
// stride is equal to kernel size
|
||||
// padding is zero
|
||||
// example:
|
||||
// a: 16 16 3 768
|
||||
// b: 1024 1024 3 1
|
||||
// res: 64 64 768 1
|
||||
// used in sam
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
// kernel size is a->ne[0] x a->ne[1]
|
||||
// stride is 1
|
||||
// padding is half
|
||||
// example:
|
||||
// a: 3 3 256 256
|
||||
// b: 64 64 256 1
|
||||
// res: 64 64 256 1
|
||||
// used in sam
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s,
|
||||
int d);
|
||||
int stride);
|
||||
|
||||
enum ggml_op_pool {
|
||||
GGML_OP_POOL_MAX,
|
||||
@@ -1253,7 +1348,7 @@ extern "C" {
|
||||
GGML_OP_POOL_COUNT,
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_1d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -1261,7 +1356,7 @@ extern "C" {
|
||||
int s0, // stride
|
||||
int p0); // padding
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_2d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -1272,6 +1367,13 @@ extern "C" {
|
||||
int p0,
|
||||
int p1);
|
||||
|
||||
// nearest interpolate
|
||||
// used in stable-diffusion
|
||||
GGML_API struct ggml_tensor * ggml_upscale(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int scale_factor);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_flash_attn(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * q,
|
||||
@@ -1315,15 +1417,6 @@ extern "C" {
|
||||
int h0,
|
||||
int w);
|
||||
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_unary(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
@@ -1334,63 +1427,159 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_unary_op op);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
// used in sam
|
||||
GGML_API struct ggml_tensor * ggml_get_rel_pos(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int qh,
|
||||
int kh);
|
||||
|
||||
// used in sam
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * pw,
|
||||
struct ggml_tensor * ph);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * pw,
|
||||
struct ggml_tensor * ph);
|
||||
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3_inplace instead");
|
||||
|
||||
// custom operators v2
|
||||
|
||||
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||
|
||||
#define GGML_N_TASKS_MAX -1
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
// loss function
|
||||
|
||||
@@ -1622,6 +1811,118 @@ extern "C" {
|
||||
|
||||
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
||||
|
||||
//
|
||||
// gguf
|
||||
//
|
||||
|
||||
enum gguf_type {
|
||||
GGUF_TYPE_UINT8 = 0,
|
||||
GGUF_TYPE_INT8 = 1,
|
||||
GGUF_TYPE_UINT16 = 2,
|
||||
GGUF_TYPE_INT16 = 3,
|
||||
GGUF_TYPE_UINT32 = 4,
|
||||
GGUF_TYPE_INT32 = 5,
|
||||
GGUF_TYPE_FLOAT32 = 6,
|
||||
GGUF_TYPE_BOOL = 7,
|
||||
GGUF_TYPE_STRING = 8,
|
||||
GGUF_TYPE_ARRAY = 9,
|
||||
GGUF_TYPE_COUNT, // marks the end of the enum
|
||||
};
|
||||
|
||||
struct gguf_context;
|
||||
|
||||
struct gguf_init_params {
|
||||
bool no_alloc;
|
||||
|
||||
// if not NULL, create a ggml_context and allocate the tensor data in it
|
||||
struct ggml_context ** ctx;
|
||||
};
|
||||
|
||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
||||
|
||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||
|
||||
GGML_API const char * gguf_type_name(enum gguf_type type);
|
||||
|
||||
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
||||
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
||||
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
||||
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
||||
|
||||
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
||||
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
||||
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
||||
|
||||
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
||||
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
||||
|
||||
// results are undefined if the wrong type is used for the key
|
||||
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
||||
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
||||
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
||||
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
||||
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
||||
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
||||
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
||||
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
||||
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
||||
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
||||
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
||||
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
||||
|
||||
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
||||
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
||||
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
||||
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
||||
|
||||
// overrides existing values or adds a new one
|
||||
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
||||
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
||||
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
||||
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
||||
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
||||
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
||||
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
||||
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
||||
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
||||
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
||||
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
||||
|
||||
// set or add KV pairs from another context
|
||||
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
||||
|
||||
// manage tensor info
|
||||
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
||||
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
||||
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
||||
|
||||
// writing gguf files can be done in 2 ways:
|
||||
//
|
||||
// - write the entire gguf_context to a binary file in a single pass:
|
||||
//
|
||||
// gguf_write_to_file(ctx, fname);
|
||||
//
|
||||
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
||||
//
|
||||
// FILE * f = fopen(fname, "wb");
|
||||
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
||||
// fwrite(f, ...);
|
||||
// void * data = gguf_meta_get_meta_data(ctx);
|
||||
// fseek(f, 0, SEEK_SET);
|
||||
// fwrite(f, data, gguf_get_meta_size(ctx));
|
||||
// free(data);
|
||||
// fclose(f);
|
||||
//
|
||||
|
||||
// write the entire context to a binary file
|
||||
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
||||
|
||||
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
||||
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
||||
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
||||
|
||||
//
|
||||
// system info
|
||||
//
|
||||
@@ -1659,6 +1960,10 @@ extern "C" {
|
||||
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
||||
|
||||
typedef struct {
|
||||
const char * type_name;
|
||||
int blck_size;
|
||||
size_t type_size;
|
||||
bool is_quantized;
|
||||
ggml_to_float_t to_float;
|
||||
ggml_from_float_t from_float;
|
||||
ggml_from_float_t from_float_reference;
|
||||
@@ -1666,7 +1971,7 @@ extern "C" {
|
||||
enum ggml_type vec_dot_type;
|
||||
} ggml_type_traits_t;
|
||||
|
||||
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type i);
|
||||
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
722
gguf.py
Normal file
722
gguf.py
Normal file
@@ -0,0 +1,722 @@
|
||||
import shutil
|
||||
import sys
|
||||
import struct
|
||||
import tempfile
|
||||
import numpy as np
|
||||
|
||||
from enum import IntEnum, auto
|
||||
from typing import Any, IO, List, Optional
|
||||
|
||||
#
|
||||
# constants
|
||||
#
|
||||
|
||||
GGUF_MAGIC = 0x46554747
|
||||
GGUF_VERSION = 1
|
||||
GGUF_DEFAULT_ALIGNMENT = 32
|
||||
|
||||
# general
|
||||
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
||||
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
||||
KEY_GENERAL_ALIGNMENT = "general.alignment"
|
||||
KEY_GENERAL_NAME = "general.name"
|
||||
KEY_GENERAL_AUTHOR = "general.author"
|
||||
KEY_GENERAL_URL = "general.url"
|
||||
KEY_GENERAL_DESCRIPTION = "general.description"
|
||||
KEY_GENERAL_LICENSE = "general.license"
|
||||
KEY_GENERAL_SOURCE_URL = "general.source.url"
|
||||
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
|
||||
KEY_GENERAL_FILE_TYPE = "general.file_type"
|
||||
|
||||
# LLM
|
||||
KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length"
|
||||
KEY_LLM_EMBEDDING_LENGTH = "{arch}.embedding_length"
|
||||
KEY_LLM_BLOCK_COUNT = "{arch}.block_count"
|
||||
KEY_LLM_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
||||
KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
|
||||
KEY_LLM_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
||||
|
||||
# attention
|
||||
KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count"
|
||||
KEY_ATTENTION_HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
|
||||
KEY_ATTENTION_MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
|
||||
KEY_ATTENTION_CLAMP_KQV = "{arch}.attention.clamp_kqv"
|
||||
KEY_ATTENTION_LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
||||
KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
||||
|
||||
# RoPE
|
||||
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||
KEY_ROPE_SCALE_LINEAR = "{arch}.rope.scale_linear"
|
||||
|
||||
# tokenization
|
||||
KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
|
||||
KEY_TOKENIZER_LIST = "tokenizer.ggml.tokens"
|
||||
KEY_TOKENIZER_TOKEN_TYPE = "tokenizer.ggml.token_type"
|
||||
KEY_TOKENIZER_SCORES = "tokenizer.ggml.scores"
|
||||
KEY_TOKENIZER_MERGES = "tokenizer.ggml.merges"
|
||||
KEY_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id"
|
||||
KEY_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id"
|
||||
KEY_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id"
|
||||
KEY_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id"
|
||||
KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||
KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json"
|
||||
KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
|
||||
|
||||
|
||||
#
|
||||
# recommended mapping of model tensor names for storage in gguf
|
||||
#
|
||||
|
||||
|
||||
class MODEL_ARCH(IntEnum):
|
||||
LLAMA = auto()
|
||||
FALCON = auto()
|
||||
GPT2 = auto()
|
||||
GPTJ = auto()
|
||||
GPTNEOX = auto()
|
||||
MPT = auto()
|
||||
|
||||
|
||||
class MODEL_TENSOR(IntEnum):
|
||||
TOKEN_EMBD = auto()
|
||||
POS_EMBD = auto()
|
||||
OUTPUT = auto()
|
||||
OUTPUT_NORM = auto()
|
||||
ROPE_FREQS = auto()
|
||||
ATTN_Q = auto()
|
||||
ATTN_K = auto()
|
||||
ATTN_V = auto()
|
||||
ATTN_QKV = auto()
|
||||
ATTN_OUT = auto()
|
||||
ATTN_NORM = auto()
|
||||
ATTN_NORM_2 = auto()
|
||||
ATTN_ROT_EMBD = auto()
|
||||
FFN_GATE = auto()
|
||||
FFN_DOWN = auto()
|
||||
FFN_UP = auto()
|
||||
FFN_NORM = auto()
|
||||
|
||||
|
||||
MODEL_ARCH_NAMES = {
|
||||
MODEL_ARCH.LLAMA: "llama",
|
||||
MODEL_ARCH.FALCON: "falcon",
|
||||
MODEL_ARCH.GPT2: "gpt2",
|
||||
MODEL_ARCH.GPTJ: "gptj",
|
||||
MODEL_ARCH.GPTNEOX: "gptneox",
|
||||
MODEL_ARCH.MPT: "mpt",
|
||||
}
|
||||
|
||||
MODEL_TENSOR_NAMES = {
|
||||
MODEL_ARCH.LLAMA: {
|
||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||
MODEL_TENSOR.OUTPUT: "output",
|
||||
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
||||
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
||||
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
||||
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||
},
|
||||
MODEL_ARCH.GPTNEOX: {
|
||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||
MODEL_TENSOR.OUTPUT: "output",
|
||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||
},
|
||||
MODEL_ARCH.FALCON: {
|
||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||
MODEL_TENSOR.OUTPUT: "output",
|
||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||
},
|
||||
MODEL_ARCH.GPT2: {
|
||||
# TODO
|
||||
},
|
||||
# TODO
|
||||
}
|
||||
|
||||
# tensors that will not be serialized
|
||||
MODEL_TENSOR_SKIP = {
|
||||
MODEL_ARCH.LLAMA: [
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# TODO: the following helper functions should be removed
|
||||
# instead, get_tensor_name_map should return tuples of (name, MODEL_TENSOR)
|
||||
# however, my Python is very bad, and I couldn't figure out how to do this, hence these functions
|
||||
# REMOVE
|
||||
def should_skip_tensor_TMP(arch: MODEL_ARCH, n_blocks: int, name: str) -> bool:
|
||||
for skip in MODEL_TENSOR_SKIP.get(arch, []):
|
||||
for i in range(n_blocks):
|
||||
if name == MODEL_TENSOR_NAMES[arch][skip].format(bid=i):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> dict:
|
||||
tensor_map = {}
|
||||
|
||||
# Token embeddings
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.TOKEN_EMBD, None)
|
||||
|
||||
tensor_map["gpt_neox.embed_in"] = mapped_to # gptneox
|
||||
tensor_map["transformer.wte"] = mapped_to # gpt2 mpt
|
||||
tensor_map["transformer.word_embeddings"] = mapped_to # falcon
|
||||
tensor_map["model.embed_tokens"] = mapped_to # llama-hf
|
||||
tensor_map["tok_embeddings"] = mapped_to # llama-pth
|
||||
|
||||
# Position embeddings
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.POS_EMBD, None)
|
||||
|
||||
tensor_map["transformer.wpe"] = mapped_to # gpt2
|
||||
|
||||
# Output
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT, None)
|
||||
|
||||
tensor_map["embed_out"] = mapped_to # gptneox
|
||||
tensor_map["lm_head"] = mapped_to # gpt2 mpt falcon llama-hf
|
||||
tensor_map["output"] = mapped_to # llama-pth
|
||||
|
||||
# Output norm
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.OUTPUT_NORM, None)
|
||||
|
||||
tensor_map["gpt_neox.final_layer_norm"] = mapped_to # gptneox
|
||||
tensor_map["transformer.ln_f"] = mapped_to # gpt2 falcon
|
||||
tensor_map["transformer.norm_f"] = mapped_to # mpt
|
||||
tensor_map["model.norm"] = mapped_to # llama-hf
|
||||
tensor_map["norm"] = mapped_to # llama-pth
|
||||
|
||||
# Rope frequencies
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ROPE_FREQS, None)
|
||||
|
||||
tensor_map["rope.freqs"] = mapped_to # llama-pth
|
||||
|
||||
# Attention and feed-forward blocks
|
||||
for i in range(0, n_blocks):
|
||||
# Attention norm
|
||||
# TODO: is there are simpler way to write these 2 lines in Python?
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt
|
||||
tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b
|
||||
tensor_map["transformer.h."+str(i)+".ln_mlp"] = mapped_to # falcon40b
|
||||
tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth
|
||||
|
||||
# Attention norm 2
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_NORM_2, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b
|
||||
|
||||
# Attention query-key-value
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_QKV, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".attn.c_attn"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"] = mapped_to # mpt
|
||||
tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to # falcon
|
||||
|
||||
# Attention query
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_Q, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["model.layers."+str(i)+".self_attn.q_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention.wq"] = mapped_to # llama-pth
|
||||
|
||||
# Attention key
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_K, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["model.layers."+str(i)+".self_attn.k_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention.wk"] = mapped_to # llama-pth
|
||||
|
||||
# Attention value
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_V, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["model.layers."+str(i)+".self_attn.v_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention.wv"] = mapped_to # llama-pth
|
||||
|
||||
# Attention output
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_OUT, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".attention.dense"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".attn.c_proj"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".attn.out_proj"] = mapped_to # mpt
|
||||
tensor_map["transformer.h."+str(i)+".self_attention.dense"] = mapped_to # falcon
|
||||
tensor_map["model.layers."+str(i)+".self_attn.o_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention.wo"] = mapped_to # llama-pth
|
||||
|
||||
# Rotary embeddings
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.ATTN_ROT_EMBD, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["model.layers."+str(i)+".self_attn.rotary_emb.inv_freq"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".attention.inner_attention.rope.freqs"] = mapped_to # llama-pth
|
||||
|
||||
# Feed-forward norm
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_NORM, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".post_attention_layernorm"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".ln_2"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".norm_2"] = mapped_to # mpt
|
||||
tensor_map["model.layers."+str(i)+".post_attention_layernorm"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".ffn_norm"] = mapped_to # llama-pth
|
||||
|
||||
# Feed-forward up
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_UP, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".mlp.c_fc"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"] = mapped_to # mpt
|
||||
tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # falcon
|
||||
tensor_map["model.layers."+str(i)+".mlp.up_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".feed_forward.w3"] = mapped_to # llama-pth
|
||||
|
||||
# Feed-forward gate
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_GATE, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["model.layers."+str(i)+".mlp.gate_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".feed_forward.w1"] = mapped_to # llama-pth
|
||||
|
||||
# Feed-forward down
|
||||
mapped_to = MODEL_TENSOR_NAMES[arch].get(MODEL_TENSOR.FFN_DOWN, None)
|
||||
mapped_to = mapped_to.format(bid=i) if mapped_to is not None else None
|
||||
|
||||
tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # gptneox
|
||||
tensor_map["transformer.h."+str(i)+".mlp.c_proj"] = mapped_to # gpt2
|
||||
tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"] = mapped_to # mpt
|
||||
tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # falcon
|
||||
tensor_map["model.layers."+str(i)+".mlp.down_proj"] = mapped_to # llama-hf
|
||||
tensor_map["layers."+str(i)+".feed_forward.w2"] = mapped_to # llama-pth
|
||||
|
||||
return tensor_map
|
||||
|
||||
|
||||
class TokenType(IntEnum):
|
||||
NORMAL = 1
|
||||
UNKNOWN = 2
|
||||
CONTROL = 3
|
||||
USER_DEFINED = 4
|
||||
UNUSED = 5
|
||||
BYTE = 6
|
||||
|
||||
#
|
||||
# implementation
|
||||
#
|
||||
|
||||
|
||||
class GGMLQuantizationType(IntEnum):
|
||||
F32 = 0
|
||||
F16 = 1
|
||||
Q4_0 = 2
|
||||
Q4_1 = 3
|
||||
Q5_0 = 6
|
||||
Q5_1 = 7
|
||||
Q8_0 = 8
|
||||
Q8_1 = 9
|
||||
Q2_K = 10
|
||||
Q3_K = 11
|
||||
Q4_K = 12
|
||||
Q5_K = 13
|
||||
Q6_K = 14
|
||||
Q8_K = 15
|
||||
|
||||
|
||||
class GGUFValueType(IntEnum):
|
||||
UINT8 = 0
|
||||
INT8 = 1
|
||||
UINT16 = 2
|
||||
INT16 = 3
|
||||
UINT32 = 4
|
||||
INT32 = 5
|
||||
FLOAT32 = 6
|
||||
BOOL = 7
|
||||
STRING = 8
|
||||
ARRAY = 9
|
||||
|
||||
@staticmethod
|
||||
def get_type(val):
|
||||
if isinstance(val, str) or isinstance(val, bytes) or isinstance(val, bytearray):
|
||||
return GGUFValueType.STRING
|
||||
elif isinstance(val, list):
|
||||
return GGUFValueType.ARRAY
|
||||
elif isinstance(val, float):
|
||||
return GGUFValueType.FLOAT32
|
||||
elif isinstance(val, bool):
|
||||
return GGUFValueType.BOOL
|
||||
elif isinstance(val, int):
|
||||
return GGUFValueType.INT32
|
||||
else:
|
||||
print("Unknown type: "+str(type(val)))
|
||||
sys.exit()
|
||||
|
||||
|
||||
class GGUFWriter:
|
||||
def __init__(self, path: str, arch: str, use_temp_file = True):
|
||||
self.fout = open(path, "wb")
|
||||
self.arch = arch
|
||||
self.offset_tensor = 0
|
||||
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
|
||||
self.kv_data = b""
|
||||
self.kv_data_count = 0
|
||||
self.ti_data = b""
|
||||
self.ti_data_count = 0
|
||||
self.add_architecture()
|
||||
self.use_temp_file = use_temp_file
|
||||
self.tensors = []
|
||||
|
||||
def write_header_to_file(self):
|
||||
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
||||
self.fout.write(struct.pack("<I", GGUF_VERSION))
|
||||
self.fout.write(struct.pack("<I", self.ti_data_count))
|
||||
self.fout.write(struct.pack("<I", self.kv_data_count))
|
||||
self.flush()
|
||||
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
||||
|
||||
def write_kv_data_to_file(self):
|
||||
self.fout.write(self.kv_data)
|
||||
self.flush()
|
||||
|
||||
def write_ti_data_to_file(self):
|
||||
self.fout.write(self.ti_data)
|
||||
self.flush()
|
||||
|
||||
def add_key(self, key: str):
|
||||
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
|
||||
|
||||
def add_uint8(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.UINT8)
|
||||
|
||||
def add_int8(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.INT8)
|
||||
|
||||
def add_uint16(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.UINT16)
|
||||
|
||||
def add_int16(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.INT16)
|
||||
|
||||
def add_uint32(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.UINT32)
|
||||
|
||||
def add_int32(self, key: str, val: int):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.INT32)
|
||||
|
||||
def add_float32(self, key: str, val: float):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.FLOAT32)
|
||||
|
||||
def add_bool(self, key: str, val: bool):
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.BOOL)
|
||||
|
||||
def add_string(self, key: str, val: str):
|
||||
if len(val) == 0:
|
||||
return
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.STRING)
|
||||
|
||||
def add_array(self, key: str, val: list):
|
||||
if not isinstance(val, list):
|
||||
raise ValueError("Value must be a list for array type")
|
||||
|
||||
self.add_key(key)
|
||||
self.add_val(val, GGUFValueType.ARRAY)
|
||||
|
||||
def add_val(self: str, val: Any, vtype: GGUFValueType = None, add_vtype: bool = True):
|
||||
if vtype is None:
|
||||
vtype = GGUFValueType.get_type(val)
|
||||
|
||||
if add_vtype:
|
||||
self.kv_data += struct.pack("<I", vtype)
|
||||
self.kv_data_count += 1
|
||||
|
||||
if vtype == GGUFValueType.UINT8:
|
||||
self.kv_data += struct.pack("<B", val)
|
||||
elif vtype == GGUFValueType.INT8:
|
||||
self.kv_data += struct.pack("<b", val)
|
||||
elif vtype == GGUFValueType.UINT16:
|
||||
self.kv_data += struct.pack("<H", val)
|
||||
elif vtype == GGUFValueType.INT16:
|
||||
self.kv_data += struct.pack("<h", val)
|
||||
elif vtype == GGUFValueType.UINT32:
|
||||
self.kv_data += struct.pack("<I", val)
|
||||
elif vtype == GGUFValueType.INT32:
|
||||
self.kv_data += struct.pack("<i", val)
|
||||
elif vtype == GGUFValueType.FLOAT32:
|
||||
self.kv_data += struct.pack("<f", val)
|
||||
elif vtype == GGUFValueType.BOOL:
|
||||
self.kv_data += struct.pack("?", val)
|
||||
elif vtype == GGUFValueType.STRING:
|
||||
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
||||
self.kv_data += struct.pack("<I", len(encoded_val))
|
||||
self.kv_data += encoded_val
|
||||
elif vtype == GGUFValueType.ARRAY:
|
||||
ltype = set([GGUFValueType.get_type(item) for item in val])
|
||||
assert len(ltype) == 1, "All items in a GGUF array should be of the same type"
|
||||
self.kv_data += struct.pack("<I", list(ltype)[0])
|
||||
self.kv_data += struct.pack("<I", len(val))
|
||||
for item in val:
|
||||
self.add_val(item, add_vtype=False)
|
||||
else:
|
||||
raise ValueError("Invalid GGUF metadata value type")
|
||||
|
||||
@staticmethod
|
||||
def ggml_pad(x: int, n: int) -> int:
|
||||
return ((x + n - 1) // n) * n
|
||||
|
||||
def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.dtype, tensor_nbytes: int, raw_dtype: Optional[GGMLQuantizationType] = None):
|
||||
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
||||
|
||||
encoded_name = name.encode("utf8")
|
||||
self.ti_data += struct.pack("<I", len(encoded_name))
|
||||
self.ti_data += encoded_name
|
||||
n_dims = len(tensor_shape)
|
||||
self.ti_data += struct.pack("<I", n_dims)
|
||||
for i in range(n_dims):
|
||||
self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
|
||||
if raw_dtype is None:
|
||||
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
||||
else:
|
||||
dtype = raw_dtype
|
||||
self.ti_data += struct.pack("<I", dtype)
|
||||
self.ti_data += struct.pack("<Q", self.offset_tensor)
|
||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||
self.ti_data_count += 1
|
||||
|
||||
def add_tensor(self, name: str, tensor: np.ndarray, raw_shape: Optional[np.ndarray] = None, raw_dtype: Optional[GGMLQuantizationType] = None):
|
||||
if self.use_temp_file and not hasattr(self, "temp_file"):
|
||||
self.temp_file = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
|
||||
self.temp_file.seek(0)
|
||||
|
||||
self.add_tensor_info(name, raw_shape if raw_shape is not None else tensor.shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
||||
|
||||
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
|
||||
|
||||
if not self.use_temp_file:
|
||||
self.tensors.append((tensor, pad))
|
||||
return
|
||||
|
||||
tensor.tofile(self.temp_file)
|
||||
|
||||
if pad != 0:
|
||||
self.temp_file.write(bytes([0] * pad))
|
||||
|
||||
def write_tensor_data(self, tensor: np.ndarray):
|
||||
pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
|
||||
if pad != 0:
|
||||
self.fout.write(bytes([0] * pad))
|
||||
|
||||
tensor.tofile(self.fout)
|
||||
|
||||
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
|
||||
if pad != 0:
|
||||
self.fout.write(bytes([0] * pad))
|
||||
|
||||
def write_tensors_to_file(self):
|
||||
self.write_ti_data_to_file()
|
||||
|
||||
pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
|
||||
if pad != 0:
|
||||
self.fout.write(bytes([0] * pad))
|
||||
|
||||
if not self.use_temp_file:
|
||||
for (currtensor, currpad) in self.tensors:
|
||||
currtensor.tofile(self.fout)
|
||||
if currpad != 0:
|
||||
self.fout.write(bytes([0] * currpad))
|
||||
return
|
||||
|
||||
self.temp_file.seek(0)
|
||||
|
||||
shutil.copyfileobj(self.temp_file, self.fout)
|
||||
self.flush()
|
||||
self.temp_file.close()
|
||||
|
||||
def flush(self):
|
||||
self.fout.flush()
|
||||
|
||||
def close(self):
|
||||
self.fout.close()
|
||||
|
||||
def add_architecture(self):
|
||||
self.add_string(KEY_GENERAL_ARCHITECTURE, self.arch)
|
||||
|
||||
def add_author(self, author: str):
|
||||
self.add_string(KEY_GENERAL_AUTHOR, author)
|
||||
|
||||
def add_tensor_data_layout(self, layout: str):
|
||||
self.add_string(KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||
|
||||
def add_url(self, url: str):
|
||||
self.add_string(KEY_GENERAL_URL, url)
|
||||
|
||||
def add_description(self, description: str):
|
||||
self.add_string(KEY_GENERAL_DESCRIPTION, description)
|
||||
|
||||
def add_source_url(self, url: str):
|
||||
self.add_string(KEY_GENERAL_SOURCE_URL, url)
|
||||
|
||||
def add_source_hf_repo(self, repo: str):
|
||||
self.add_string(KEY_GENERAL_SOURCE_HF_REPO, repo)
|
||||
|
||||
def add_file_type(self, ftype: int):
|
||||
self.add_uint32(KEY_GENERAL_FILE_TYPE, ftype)
|
||||
|
||||
def add_name(self, name: str):
|
||||
self.add_string(KEY_GENERAL_NAME, name)
|
||||
|
||||
def add_quantization_version(self, quantization_version: GGMLQuantizationType):
|
||||
self.add_uint32(
|
||||
KEY_GENERAL_QUANTIZATION_VERSION, quantization_version)
|
||||
|
||||
def add_custom_alignment(self, alignment: int):
|
||||
self.data_alignment = alignment
|
||||
self.add_uint32(KEY_GENERAL_ALIGNMENT, alignment)
|
||||
|
||||
def add_context_length(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_LLM_CONTEXT_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_embedding_length(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_LLM_EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_block_count(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_LLM_BLOCK_COUNT.format(arch=self.arch), length)
|
||||
|
||||
def add_feed_forward_length(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_LLM_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_parallel_residual(self, use: bool):
|
||||
self.add_bool(
|
||||
KEY_LLM_USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
||||
|
||||
def add_tensor_data_layout(self, layout: str):
|
||||
self.add_string(
|
||||
KEY_LLM_TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||
|
||||
def add_head_count(self, count: int):
|
||||
self.add_uint32(
|
||||
KEY_ATTENTION_HEAD_COUNT.format(arch=self.arch), count)
|
||||
|
||||
def add_head_count_kv(self, count: int):
|
||||
self.add_uint32(
|
||||
KEY_ATTENTION_HEAD_COUNT_KV.format(arch=self.arch), count)
|
||||
|
||||
def add_max_alibi_bias(self, bias: float):
|
||||
self.add_float32(
|
||||
KEY_ATTENTION_MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
||||
|
||||
def add_clamp_kqv(self, value: float):
|
||||
self.add_float32(
|
||||
KEY_ATTENTION_CLAMP_KQV.format(arch=self.arch), value)
|
||||
|
||||
def add_layer_norm_eps(self, value: float):
|
||||
self.add_float32(
|
||||
KEY_ATTENTION_LAYERNORM_EPS.format(arch=self.arch), value)
|
||||
|
||||
def add_layer_norm_rms_eps(self, value: float):
|
||||
self.add_float32(
|
||||
KEY_ATTENTION_LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
||||
|
||||
def add_rope_dimension_count(self, count: int):
|
||||
self.add_uint32(
|
||||
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
|
||||
|
||||
def add_rope_scale_linear(self, value: float):
|
||||
self.add_float32(KEY_ROPE_SCALE_LINEAR.format(arch=self.arch), value)
|
||||
|
||||
def add_tokenizer_model(self, model: str):
|
||||
self.add_string(KEY_TOKENIZER_MODEL, model)
|
||||
|
||||
def add_token_list(self, tokens: List):
|
||||
self.add_array(KEY_TOKENIZER_LIST, tokens)
|
||||
|
||||
def add_token_merges(self, merges: List):
|
||||
self.add_array(KEY_TOKENIZER_MERGES, merges)
|
||||
|
||||
def add_token_types(self, types: List[int]):
|
||||
self.add_array(KEY_TOKENIZER_TOKEN_TYPE, types)
|
||||
|
||||
def add_token_scores(self, scores: List[float]):
|
||||
self.add_array(KEY_TOKENIZER_SCORES, scores)
|
||||
|
||||
def add_bos_token_id(self, id: int):
|
||||
self.add_uint32(KEY_TOKENIZER_BOS_ID, id)
|
||||
|
||||
def add_eos_token_id(self, id: int):
|
||||
self.add_uint32(KEY_TOKENIZER_EOS_ID, id)
|
||||
|
||||
def add_unk_token_id(self, id: int):
|
||||
self.add_uint32(KEY_TOKENIZER_UNK_ID, id)
|
||||
|
||||
def add_sep_token_id(self, id: int):
|
||||
self.add_uint32(KEY_TOKENIZER_SEP_ID, id)
|
||||
|
||||
def add_pad_token_id(self, id: int):
|
||||
self.add_uint32(KEY_TOKENIZER_PAD_ID, id)
|
||||
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
# Example usage with a file
|
||||
gguf_writer = GGUFWriter("example.gguf", "llama")
|
||||
|
||||
gguf_writer.add_architecture()
|
||||
gguf_writer.add_block_count(12)
|
||||
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
|
||||
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
|
||||
gguf_writer.add_custom_alignment(64)
|
||||
|
||||
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
|
||||
tensor2 = np.ones((64,), dtype=np.float32) * 101.0
|
||||
tensor3 = np.ones((96,), dtype=np.float32) * 102.0
|
||||
|
||||
gguf_writer.add_tensor("tensor1", tensor1)
|
||||
gguf_writer.add_tensor("tensor2", tensor2)
|
||||
gguf_writer.add_tensor("tensor3", tensor3)
|
||||
|
||||
gguf_writer.write_header_to_file()
|
||||
gguf_writer.write_kv_data_to_file()
|
||||
gguf_writer.write_tensors_to_file()
|
||||
|
||||
gguf_writer.close()
|
||||
@@ -1,29 +1,25 @@
|
||||
# Grammar for subset of JSON - doesn't support full string or number syntax
|
||||
|
||||
root ::= object
|
||||
value ::= object | array | string | number | boolean | "null"
|
||||
root ::= object
|
||||
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}"
|
||||
)? "}" ws
|
||||
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]"
|
||||
)? "]" ws
|
||||
|
||||
string ::=
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
# Only plain integers currently
|
||||
number ::= "-"? [0-9]+ ws
|
||||
boolean ::= ("true" | "false") ws
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= ([ \t\n] ws)?
|
||||
|
||||
164
k_quants.c
164
k_quants.c
@@ -77,6 +77,11 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
||||
}
|
||||
return 1/iscale;
|
||||
}
|
||||
bool return_early = false;
|
||||
if (rmse_type < 0) {
|
||||
rmse_type = -rmse_type;
|
||||
return_early = true;
|
||||
}
|
||||
int weight_type = rmse_type%2;
|
||||
float sumlx = 0;
|
||||
float suml2 = 0;
|
||||
@@ -89,56 +94,9 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
||||
suml2 += w*l*l;
|
||||
}
|
||||
float scale = sumlx/suml2;
|
||||
if (return_early) return suml2 > 0 ? 0.5f*(scale + 1/iscale) : 1/iscale;
|
||||
float best = scale * sumlx;
|
||||
for (int itry = 0; itry < 3; ++itry) {
|
||||
iscale = 1/scale;
|
||||
float slx = 0;
|
||||
float sl2 = 0;
|
||||
bool changed = false;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int(iscale * x[i]);
|
||||
l = MAX(-nmax, MIN(nmax-1, l));
|
||||
if (l + nmax != L[i]) { changed = true; }
|
||||
float w = weight_type == 1 ? x[i] * x[i] : 1.f;
|
||||
slx += w*x[i]*l;
|
||||
sl2 += w*l*l;
|
||||
}
|
||||
if (!changed || sl2 == 0 || slx*slx <= best*sl2) { break; }
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int(iscale * x[i]);
|
||||
L[i] = nmax + MAX(-nmax, MIN(nmax-1, l));
|
||||
}
|
||||
sumlx = slx; suml2 = sl2;
|
||||
scale = sumlx/suml2;
|
||||
best = scale * sumlx;
|
||||
}
|
||||
for (int itry = 0; itry < 5; ++itry) {
|
||||
int n_changed = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
float w = weight_type == 1 ? x[i]*x[i] : 1;
|
||||
int l = L[i] - nmax;
|
||||
float slx = sumlx - w*x[i]*l;
|
||||
if (slx > 0) {
|
||||
float sl2 = suml2 - w*l*l;
|
||||
int new_l = nearest_int(x[i] * sl2 / slx);
|
||||
new_l = MAX(-nmax, MIN(nmax-1, new_l));
|
||||
if (new_l != l) {
|
||||
slx += w*x[i]*new_l;
|
||||
sl2 += w*new_l*new_l;
|
||||
if (sl2 > 0 && slx*slx*suml2 > sumlx*sumlx*sl2) {
|
||||
L[i] = nmax + new_l; sumlx = slx; suml2 = sl2;
|
||||
scale = sumlx / suml2; best = scale * sumlx;
|
||||
++n_changed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!n_changed) { break; }
|
||||
}
|
||||
if (rmse_type < 3) {
|
||||
return scale;
|
||||
}
|
||||
for (int is = -4; is <= 4; ++is) {
|
||||
for (int is = -9; is <= 9; ++is) {
|
||||
if (is == 0) {
|
||||
continue;
|
||||
}
|
||||
@@ -221,12 +179,17 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
|
||||
return 1/iscale;
|
||||
}
|
||||
|
||||
static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t * restrict L, float * restrict the_min, int ntry) {
|
||||
static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t * restrict L, float * restrict the_min,
|
||||
int ntry, float alpha) {
|
||||
float min = x[0];
|
||||
float max = x[0];
|
||||
float sum_x = 0;
|
||||
float sum_x2 = 0;
|
||||
for (int i = 1; i < n; ++i) {
|
||||
if (x[i] < min) min = x[i];
|
||||
if (x[i] > max) max = x[i];
|
||||
sum_x += x[i];
|
||||
sum_x2 += x[i]*x[i];
|
||||
}
|
||||
if (max == min) {
|
||||
for (int i = 0; i < n; ++i) L[i] = 0;
|
||||
@@ -254,7 +217,7 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
||||
for (int i = 0; i < n; ++i) {
|
||||
sum += x[i] - scale*L[i];
|
||||
}
|
||||
min = sum/n;
|
||||
min = alpha*min + (1 - alpha)*sum/n;
|
||||
if (min > 0) min = 0;
|
||||
iscale = 1/scale;
|
||||
if (!did_change) break;
|
||||
@@ -263,6 +226,82 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
||||
return scale;
|
||||
}
|
||||
|
||||
static float make_qkx2_quants(int n, int nmax, const float * restrict x, const float * restrict weights,
|
||||
uint8_t * restrict L, float * restrict the_min, uint8_t * restrict Laux,
|
||||
float rmin, float rdelta, int nstep, bool use_mad) {
|
||||
float min = x[0];
|
||||
float max = x[0];
|
||||
float sum_w = weights[0];
|
||||
float sum_x = sum_w * x[0];
|
||||
for (int i = 1; i < n; ++i) {
|
||||
if (x[i] < min) min = x[i];
|
||||
if (x[i] > max) max = x[i];
|
||||
float w = weights[i];
|
||||
sum_w += w;
|
||||
sum_x += w * x[i];
|
||||
}
|
||||
if (min > 0) min = 0;
|
||||
if (max == min) {
|
||||
for (int i = 0; i < n; ++i) L[i] = 0;
|
||||
*the_min = -min;
|
||||
return 0.f;
|
||||
}
|
||||
float iscale = nmax/(max - min);
|
||||
float scale = 1/iscale;
|
||||
float best_mad = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int(iscale*(x[i] - min));
|
||||
L[i] = MAX(0, MIN(nmax, l));
|
||||
float diff = scale * L[i] + min - x[i];
|
||||
diff = use_mad ? fabsf(diff) : diff * diff;
|
||||
float w = weights[i];
|
||||
best_mad += w * diff;
|
||||
}
|
||||
if (nstep < 1) {
|
||||
*the_min = -min;
|
||||
return scale;
|
||||
}
|
||||
for (int is = 0; is <= nstep; ++is) {
|
||||
iscale = (rmin + rdelta*is + nmax)/(max - min);
|
||||
float sum_l = 0, sum_l2 = 0, sum_xl = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int(iscale*(x[i] - min));
|
||||
l = MAX(0, MIN(nmax, l));
|
||||
Laux[i] = l;
|
||||
float w = weights[i];
|
||||
sum_l += w*l;
|
||||
sum_l2 += w*l*l;
|
||||
sum_xl += w*l*x[i];
|
||||
}
|
||||
float D = sum_w * sum_l2 - sum_l * sum_l;
|
||||
if (D > 0) {
|
||||
float this_scale = (sum_w * sum_xl - sum_x * sum_l)/D;
|
||||
float this_min = (sum_l2 * sum_x - sum_l * sum_xl)/D;
|
||||
if (this_min > 0) {
|
||||
this_min = 0;
|
||||
this_scale = sum_xl / sum_l2;
|
||||
}
|
||||
float mad = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
float diff = this_scale * Laux[i] + this_min - x[i];
|
||||
diff = use_mad ? fabsf(diff) : diff * diff;
|
||||
float w = weights[i];
|
||||
mad += w * diff;
|
||||
}
|
||||
if (mad < best_mad) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
L[i] = Laux[i];
|
||||
}
|
||||
best_mad = mad;
|
||||
scale = this_scale;
|
||||
min = this_min;
|
||||
}
|
||||
}
|
||||
}
|
||||
*the_min = -min;
|
||||
return scale;
|
||||
}
|
||||
|
||||
#if QK_K == 256
|
||||
static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t * restrict d, uint8_t * restrict m) {
|
||||
if (j < 4) {
|
||||
@@ -281,6 +320,8 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
||||
const int nb = k / QK_K;
|
||||
|
||||
uint8_t L[QK_K];
|
||||
uint8_t Laux[16];
|
||||
float weights[16];
|
||||
float mins[QK_K/16];
|
||||
float scales[QK_K/16];
|
||||
|
||||
@@ -291,7 +332,8 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||
float max_min = 0;
|
||||
for (int j = 0; j < QK_K/16; ++j) {
|
||||
scales[j] = make_qkx1_quants(16, 3, x + 16*j, L + 16*j, &mins[j], 5);
|
||||
for (int l = 0; l < 16; ++l) weights[l] = fabsf(x[16*j + l]);
|
||||
scales[j] = make_qkx2_quants(16, 3, x + 16*j, weights, L + 16*j, &mins[j], Laux, -0.5f, 0.1f, 15, true);
|
||||
float scale = scales[j];
|
||||
if (scale > max_scale) {
|
||||
max_scale = scale;
|
||||
@@ -637,6 +679,8 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
|
||||
const int nb = k / QK_K;
|
||||
|
||||
uint8_t L[QK_K];
|
||||
uint8_t Laux[32];
|
||||
float weights[32];
|
||||
float mins[QK_K/32];
|
||||
float scales[QK_K/32];
|
||||
|
||||
@@ -645,7 +689,12 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
|
||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||
float max_min = 0;
|
||||
for (int j = 0; j < QK_K/32; ++j) {
|
||||
scales[j] = make_qkx1_quants(32, 15, x + 32*j, L + 32*j, &mins[j], 5);
|
||||
//scales[j] = make_qkx1_quants(32, 15, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
|
||||
float sum_x2 = 0;
|
||||
for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
|
||||
float av_x = sqrtf(sum_x2/32);
|
||||
for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
|
||||
scales[j] = make_qkx2_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -1.f, 0.1f, 20, false);
|
||||
float scale = scales[j];
|
||||
if (scale > max_scale) {
|
||||
max_scale = scale;
|
||||
@@ -798,6 +847,8 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
|
||||
uint8_t L[QK_K];
|
||||
float mins[QK_K/32];
|
||||
float scales[QK_K/32];
|
||||
float weights[32];
|
||||
uint8_t Laux[32];
|
||||
#else
|
||||
int8_t L[QK_K];
|
||||
float scales[QK_K/16];
|
||||
@@ -810,7 +861,12 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
|
||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||
float max_min = 0;
|
||||
for (int j = 0; j < QK_K/32; ++j) {
|
||||
scales[j] = make_qkx1_quants(32, 31, x + 32*j, L + 32*j, &mins[j], 5);
|
||||
//scales[j] = make_qkx1_quants(32, 31, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
|
||||
float sum_x2 = 0;
|
||||
for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
|
||||
float av_x = sqrtf(sum_x2/32);
|
||||
for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
|
||||
scales[j] = make_qkx2_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.5f, 0.1f, 15, false);
|
||||
float scale = scales[j];
|
||||
if (scale > max_scale) {
|
||||
max_scale = scale;
|
||||
|
||||
504
llama-util.h
504
llama-util.h
@@ -1,504 +0,0 @@
|
||||
// Internal header to be included only by llama.cpp.
|
||||
// Contains wrappers around OS interfaces.
|
||||
|
||||
#ifndef LLAMA_UTIL_H
|
||||
#define LLAMA_UTIL_H
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <cstdarg>
|
||||
#include <cstdlib>
|
||||
#include <climits>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
|
||||
#ifdef __has_include
|
||||
#if __has_include(<unistd.h>)
|
||||
#include <unistd.h>
|
||||
#if defined(_POSIX_MAPPED_FILES)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#if defined(_POSIX_MEMLOCK_RANGE)
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
#include <stdio.h> // for _fseeki64
|
||||
#endif
|
||||
|
||||
#define LLAMA_ASSERT(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "LLAMA_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef __GNUC__
|
||||
#ifdef __MINGW32__
|
||||
__attribute__((format(gnu_printf, 1, 2)))
|
||||
#else
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif
|
||||
#endif
|
||||
static std::string format(const char * fmt, ...) {
|
||||
va_list ap, ap2;
|
||||
va_start(ap, fmt);
|
||||
va_copy(ap2, ap);
|
||||
int size = vsnprintf(NULL, 0, fmt, ap);
|
||||
LLAMA_ASSERT(size >= 0 && size < INT_MAX);
|
||||
std::vector<char> buf(size + 1);
|
||||
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
||||
LLAMA_ASSERT(size2 == size);
|
||||
va_end(ap2);
|
||||
va_end(ap);
|
||||
return std::string(buf.data(), size);
|
||||
}
|
||||
|
||||
struct llama_file {
|
||||
// use FILE * so we don't have to re-open the file to mmap
|
||||
FILE * fp;
|
||||
size_t size;
|
||||
|
||||
llama_file(const char * fname, const char * mode) {
|
||||
fp = std::fopen(fname, mode);
|
||||
if (fp == NULL) {
|
||||
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
|
||||
}
|
||||
seek(0, SEEK_END);
|
||||
size = tell();
|
||||
seek(0, SEEK_SET);
|
||||
}
|
||||
|
||||
size_t tell() const {
|
||||
#ifdef _WIN32
|
||||
__int64 ret = _ftelli64(fp);
|
||||
#else
|
||||
long ret = std::ftell(fp);
|
||||
#endif
|
||||
LLAMA_ASSERT(ret != -1); // this really shouldn't fail
|
||||
return (size_t) ret;
|
||||
}
|
||||
|
||||
void seek(size_t offset, int whence) {
|
||||
#ifdef _WIN32
|
||||
int ret = _fseeki64(fp, (__int64) offset, whence);
|
||||
#else
|
||||
int ret = std::fseek(fp, (long) offset, whence);
|
||||
#endif
|
||||
LLAMA_ASSERT(ret == 0); // same
|
||||
}
|
||||
|
||||
void read_raw(void * ptr, size_t len) const {
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
std::size_t ret = std::fread(ptr, len, 1, fp);
|
||||
if (ferror(fp)) {
|
||||
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
||||
}
|
||||
if (ret != 1) {
|
||||
throw std::runtime_error(std::string("unexpectedly reached end of file"));
|
||||
}
|
||||
}
|
||||
|
||||
std::uint32_t read_u32() {
|
||||
std::uint32_t ret;
|
||||
read_raw(&ret, sizeof(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string read_string(std::uint32_t len) {
|
||||
std::vector<char> chars(len);
|
||||
read_raw(chars.data(), len);
|
||||
return std::string(chars.data(), len);
|
||||
}
|
||||
|
||||
void write_raw(const void * ptr, size_t len) const {
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
errno = 0;
|
||||
size_t ret = std::fwrite(ptr, len, 1, fp);
|
||||
if (ret != 1) {
|
||||
throw std::runtime_error(format("write error: %s", strerror(errno)));
|
||||
}
|
||||
}
|
||||
|
||||
void write_u32(std::uint32_t val) {
|
||||
write_raw(&val, sizeof(val));
|
||||
}
|
||||
|
||||
~llama_file() {
|
||||
if (fp) {
|
||||
std::fclose(fp);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(_WIN32)
|
||||
static std::string llama_format_win_err(DWORD err) {
|
||||
LPSTR buf;
|
||||
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
|
||||
if (!size) {
|
||||
return "FormatMessageA failed";
|
||||
}
|
||||
std::string ret(buf, size);
|
||||
LocalFree(buf);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct llama_mmap {
|
||||
void * addr;
|
||||
size_t size;
|
||||
|
||||
llama_mmap(const llama_mmap &) = delete;
|
||||
|
||||
#ifdef _POSIX_MAPPED_FILES
|
||||
static constexpr bool SUPPORTED = true;
|
||||
|
||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
||||
size = file->size;
|
||||
int fd = fileno(file->fp);
|
||||
int flags = MAP_SHARED;
|
||||
// prefetch/readahead impairs performance on NUMA systems
|
||||
if (numa) { prefetch = 0; }
|
||||
#ifdef __linux__
|
||||
if (prefetch) { flags |= MAP_POPULATE; }
|
||||
#endif
|
||||
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
|
||||
}
|
||||
|
||||
if (prefetch > 0) {
|
||||
// Advise the kernel to preload the mapped memory
|
||||
if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
|
||||
fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
if (numa) {
|
||||
// advise the kernel not to use readahead
|
||||
// (because the next page might not belong on the same node)
|
||||
if (madvise(addr, file->size, MADV_RANDOM)) {
|
||||
fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~llama_mmap() {
|
||||
munmap(addr, size);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
static constexpr bool SUPPORTED = true;
|
||||
|
||||
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
|
||||
(void) numa;
|
||||
|
||||
size = file->size;
|
||||
|
||||
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
||||
|
||||
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
DWORD error = GetLastError();
|
||||
|
||||
if (hMapping == NULL) {
|
||||
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
|
||||
}
|
||||
|
||||
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
|
||||
error = GetLastError();
|
||||
CloseHandle(hMapping);
|
||||
|
||||
if (addr == NULL) {
|
||||
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
|
||||
}
|
||||
|
||||
#if _WIN32_WINNT >= _WIN32_WINNT_WIN8
|
||||
if (prefetch) {
|
||||
// Advise the kernel to preload the mapped memory
|
||||
WIN32_MEMORY_RANGE_ENTRY range;
|
||||
range.VirtualAddress = addr;
|
||||
range.NumberOfBytes = (SIZE_T)size;
|
||||
if (!PrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
|
||||
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
}
|
||||
}
|
||||
#else
|
||||
#pragma message("warning: You are building for pre-Windows 8; prefetch not supported")
|
||||
#endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8
|
||||
}
|
||||
|
||||
~llama_mmap() {
|
||||
if (!UnmapViewOfFile(addr)) {
|
||||
fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
}
|
||||
}
|
||||
#else
|
||||
static constexpr bool SUPPORTED = false;
|
||||
|
||||
llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
|
||||
(void) prefetch;
|
||||
(void) numa;
|
||||
|
||||
throw std::runtime_error(std::string("mmap not supported"));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Represents some region of memory being locked using mlock or VirtualLock;
|
||||
// will automatically unlock on destruction.
|
||||
struct llama_mlock {
|
||||
void * addr = NULL;
|
||||
size_t size = 0;
|
||||
bool failed_already = false;
|
||||
|
||||
llama_mlock() {}
|
||||
llama_mlock(const llama_mlock &) = delete;
|
||||
|
||||
~llama_mlock() {
|
||||
if (size) {
|
||||
raw_unlock(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void init(void * ptr) {
|
||||
LLAMA_ASSERT(addr == NULL && size == 0);
|
||||
addr = ptr;
|
||||
}
|
||||
|
||||
void grow_to(size_t target_size) {
|
||||
LLAMA_ASSERT(addr);
|
||||
if (failed_already) {
|
||||
return;
|
||||
}
|
||||
size_t granularity = lock_granularity();
|
||||
target_size = (target_size + granularity - 1) & ~(granularity - 1);
|
||||
if (target_size > size) {
|
||||
if (raw_lock((uint8_t *) addr + size, target_size - size)) {
|
||||
size = target_size;
|
||||
} else {
|
||||
failed_already = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _POSIX_MEMLOCK_RANGE
|
||||
static constexpr bool SUPPORTED = true;
|
||||
|
||||
size_t lock_granularity() {
|
||||
return (size_t) sysconf(_SC_PAGESIZE);
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define MLOCK_SUGGESTION \
|
||||
"Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
|
||||
"decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
|
||||
#else
|
||||
#define MLOCK_SUGGESTION \
|
||||
"Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
|
||||
#endif
|
||||
|
||||
bool raw_lock(const void * addr, size_t size) {
|
||||
if (!mlock(addr, size)) {
|
||||
return true;
|
||||
} else {
|
||||
char* errmsg = std::strerror(errno);
|
||||
bool suggest = (errno == ENOMEM);
|
||||
|
||||
// Check if the resource limit is fine after all
|
||||
struct rlimit lock_limit;
|
||||
if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit))
|
||||
suggest = false;
|
||||
if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size))
|
||||
suggest = false;
|
||||
|
||||
fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
|
||||
size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#undef MLOCK_SUGGESTION
|
||||
|
||||
void raw_unlock(void * addr, size_t size) {
|
||||
if (munlock(addr, size)) {
|
||||
fprintf(stderr, "warning: failed to munlock buffer: %s\n", std::strerror(errno));
|
||||
}
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
static constexpr bool SUPPORTED = true;
|
||||
|
||||
size_t lock_granularity() {
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return (size_t) si.dwPageSize;
|
||||
}
|
||||
|
||||
bool raw_lock(void * ptr, size_t len) {
|
||||
for (int tries = 1; ; tries++) {
|
||||
if (VirtualLock(ptr, len)) {
|
||||
return true;
|
||||
}
|
||||
if (tries == 2) {
|
||||
fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
|
||||
len, size, llama_format_win_err(GetLastError()).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// It failed but this was only the first try; increase the working
|
||||
// set size and try again.
|
||||
SIZE_T min_ws_size, max_ws_size;
|
||||
if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
|
||||
fprintf(stderr, "warning: GetProcessWorkingSetSize failed: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
return false;
|
||||
}
|
||||
// Per MSDN: "The maximum number of pages that a process can lock
|
||||
// is equal to the number of pages in its minimum working set minus
|
||||
// a small overhead."
|
||||
// Hopefully a megabyte is enough overhead:
|
||||
size_t increment = len + 1048576;
|
||||
// The minimum must be <= the maximum, so we need to increase both:
|
||||
min_ws_size += increment;
|
||||
max_ws_size += increment;
|
||||
if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
|
||||
fprintf(stderr, "warning: SetProcessWorkingSetSize failed: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void raw_unlock(void * ptr, size_t len) {
|
||||
if (!VirtualUnlock(ptr, len)) {
|
||||
fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n",
|
||||
llama_format_win_err(GetLastError()).c_str());
|
||||
}
|
||||
}
|
||||
#else
|
||||
static constexpr bool SUPPORTED = false;
|
||||
|
||||
size_t lock_granularity() {
|
||||
return (size_t) 65536;
|
||||
}
|
||||
|
||||
bool raw_lock(const void * addr, size_t len) {
|
||||
fprintf(stderr, "warning: mlock not supported on this system\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
void raw_unlock(const void * addr, size_t len) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Replacement for std::vector<uint8_t> that doesn't require zero-initialization.
|
||||
struct llama_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
size_t size = 0;
|
||||
|
||||
llama_buffer() = default;
|
||||
|
||||
void resize(size_t len) {
|
||||
#ifdef GGML_USE_METAL
|
||||
free(addr);
|
||||
int result = posix_memalign((void **) &addr, getpagesize(), len);
|
||||
if (result == 0) {
|
||||
memset(addr, 0, len);
|
||||
}
|
||||
else {
|
||||
addr = NULL;
|
||||
}
|
||||
#else
|
||||
delete[] addr;
|
||||
addr = new uint8_t[len];
|
||||
#endif
|
||||
size = len;
|
||||
}
|
||||
|
||||
~llama_buffer() {
|
||||
#ifdef GGML_USE_METAL
|
||||
free(addr);
|
||||
#else
|
||||
delete[] addr;
|
||||
#endif
|
||||
addr = NULL;
|
||||
}
|
||||
|
||||
// disable copy and move
|
||||
llama_buffer(const llama_buffer&) = delete;
|
||||
llama_buffer(llama_buffer&&) = delete;
|
||||
llama_buffer& operator=(const llama_buffer&) = delete;
|
||||
llama_buffer& operator=(llama_buffer&&) = delete;
|
||||
};
|
||||
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#include "ggml-cuda.h"
|
||||
struct llama_ctx_buffer {
|
||||
uint8_t * addr = NULL;
|
||||
bool is_cuda;
|
||||
size_t size = 0;
|
||||
|
||||
llama_ctx_buffer() = default;
|
||||
|
||||
void resize(size_t size) {
|
||||
free();
|
||||
|
||||
addr = (uint8_t *) ggml_cuda_host_malloc(size);
|
||||
if (addr) {
|
||||
is_cuda = true;
|
||||
}
|
||||
else {
|
||||
// fall back to pageable memory
|
||||
addr = new uint8_t[size];
|
||||
is_cuda = false;
|
||||
}
|
||||
this->size = size;
|
||||
}
|
||||
|
||||
void free() {
|
||||
if (addr) {
|
||||
if (is_cuda) {
|
||||
ggml_cuda_host_free(addr);
|
||||
}
|
||||
else {
|
||||
delete[] addr;
|
||||
}
|
||||
}
|
||||
addr = NULL;
|
||||
}
|
||||
|
||||
~llama_ctx_buffer() {
|
||||
free();
|
||||
}
|
||||
|
||||
// disable copy and move
|
||||
llama_ctx_buffer(const llama_ctx_buffer&) = delete;
|
||||
llama_ctx_buffer(llama_ctx_buffer&&) = delete;
|
||||
llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete;
|
||||
llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete;
|
||||
};
|
||||
#else
|
||||
typedef llama_buffer llama_ctx_buffer;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
252
llama.h
252
llama.h
@@ -34,29 +34,18 @@
|
||||
# define DEPRECATED(func, hint) func
|
||||
#endif
|
||||
|
||||
#define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt'
|
||||
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
|
||||
#define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf'
|
||||
#define LLAMA_FILE_MAGIC_GGML 0x67676d6cu // 'ggml'
|
||||
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
||||
#define LLAMA_DEFAULT_SEED 0xFFFFFFFF
|
||||
|
||||
#define LLAMA_FILE_VERSION 3
|
||||
#define LLAMA_FILE_MAGIC LLAMA_FILE_MAGIC_GGJT
|
||||
#define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML
|
||||
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
||||
#define LLAMA_SESSION_VERSION 1
|
||||
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
||||
|
||||
#define LLAMA_DEFAULT_SEED 0xFFFFFFFF
|
||||
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
||||
#define LLAMA_SESSION_VERSION 1
|
||||
|
||||
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
||||
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
||||
#define LLAMA_SUPPORTS_GPU_OFFLOAD
|
||||
#endif
|
||||
|
||||
#ifndef LLAMA_DEFAULT_RMS_EPS
|
||||
#define LLAMA_DEFAULT_RMS_EPS 5e-6f
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -72,6 +61,52 @@ extern "C" {
|
||||
|
||||
typedef int llama_token;
|
||||
|
||||
enum llama_log_level {
|
||||
LLAMA_LOG_LEVEL_ERROR = 2,
|
||||
LLAMA_LOG_LEVEL_WARN = 3,
|
||||
LLAMA_LOG_LEVEL_INFO = 4
|
||||
};
|
||||
|
||||
enum llama_vocab_type {
|
||||
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
||||
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
||||
};
|
||||
|
||||
enum llama_token_type {
|
||||
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
|
||||
LLAMA_TOKEN_TYPE_NORMAL = 1,
|
||||
LLAMA_TOKEN_TYPE_UNKNOWN = 2,
|
||||
LLAMA_TOKEN_TYPE_CONTROL = 3,
|
||||
LLAMA_TOKEN_TYPE_USER_DEFINED = 4,
|
||||
LLAMA_TOKEN_TYPE_UNUSED = 5,
|
||||
LLAMA_TOKEN_TYPE_BYTE = 6,
|
||||
};
|
||||
|
||||
// model file types
|
||||
enum llama_ftype {
|
||||
LLAMA_FTYPE_ALL_F32 = 0,
|
||||
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
||||
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q2_K = 10,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q6_K = 18,// except 1d tensors
|
||||
|
||||
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
||||
};
|
||||
|
||||
typedef struct llama_token_data {
|
||||
llama_token id; // token id
|
||||
float logit; // log-odds of the token
|
||||
@@ -86,12 +121,10 @@ extern "C" {
|
||||
|
||||
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
||||
|
||||
struct llama_context_params {
|
||||
struct llama_context_params {
|
||||
uint32_t seed; // RNG seed, -1 for random
|
||||
int32_t n_ctx; // text context
|
||||
int32_t n_batch; // prompt processing batch size
|
||||
int32_t n_gqa; // grouped-query attention (TEMP - will be moved to model hparams)
|
||||
float rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
|
||||
int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
|
||||
@@ -116,33 +149,18 @@ extern "C" {
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool embedding; // embedding mode only
|
||||
};
|
||||
// model file types
|
||||
enum llama_ftype {
|
||||
LLAMA_FTYPE_ALL_F32 = 0,
|
||||
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||
// LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
||||
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q2_K = 10,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,// except 1d tensors
|
||||
LLAMA_FTYPE_MOSTLY_Q6_K = 18,// except 1d tensors
|
||||
};
|
||||
|
||||
// Signature for logging events
|
||||
// Note that text includes the new line character at the end for most events.
|
||||
// If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
|
||||
// if it exists.
|
||||
// It might not exist for progress report where '.' is output repeatedly.
|
||||
typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data);
|
||||
|
||||
// model quantization parameters
|
||||
typedef struct llama_model_quantize_params {
|
||||
int nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
|
||||
enum llama_ftype ftype; // quantize to this llama_ftype
|
||||
enum llama_ftype ftype; // quantize to this llama_ftype
|
||||
bool allow_requantize; // allow quantizing non-f32/f16 tensors
|
||||
bool quantize_output_tensor; // quantize output.weight
|
||||
} llama_model_quantize_params;
|
||||
@@ -195,23 +213,16 @@ extern "C" {
|
||||
int32_t n_eval;
|
||||
};
|
||||
|
||||
LLAMA_API int llama_max_devices();
|
||||
LLAMA_API struct llama_context_params llama_context_default_params(void);
|
||||
LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(void);
|
||||
|
||||
LLAMA_API struct llama_context_params llama_context_default_params();
|
||||
LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params();
|
||||
|
||||
LLAMA_API bool llama_mmap_supported();
|
||||
LLAMA_API bool llama_mlock_supported();
|
||||
|
||||
// TODO: not great API - very likely to change
|
||||
// Initialize the llama + ggml backend
|
||||
// If numa is true, use NUMA optimizations
|
||||
// Call once at the start of the program
|
||||
LLAMA_API void llama_backend_init(bool numa);
|
||||
// Call once at the end of the program - currently only used for MPI
|
||||
LLAMA_API void llama_backend_free();
|
||||
|
||||
LLAMA_API int64_t llama_time_us();
|
||||
// Call once at the end of the program - currently only used for MPI
|
||||
LLAMA_API void llama_backend_free(void);
|
||||
|
||||
LLAMA_API struct llama_model * llama_load_model_from_file(
|
||||
const char * path_model,
|
||||
@@ -223,17 +234,26 @@ extern "C" {
|
||||
struct llama_model * model,
|
||||
struct llama_context_params params);
|
||||
|
||||
// Various functions for loading a ggml llama model.
|
||||
// Allocate (almost) all memory needed for the model.
|
||||
// Return NULL on failure
|
||||
LLAMA_API DEPRECATED(struct llama_context * llama_init_from_file(
|
||||
const char * path_model,
|
||||
struct llama_context_params params),
|
||||
"please use llama_load_model_from_file combined with llama_new_context_with_model instead");
|
||||
|
||||
// Frees all allocated memory
|
||||
LLAMA_API void llama_free(struct llama_context * ctx);
|
||||
|
||||
LLAMA_API int64_t llama_time_us(void);
|
||||
|
||||
LLAMA_API int llama_max_devices (void);
|
||||
LLAMA_API bool llama_mmap_supported (void);
|
||||
LLAMA_API bool llama_mlock_supported(void);
|
||||
|
||||
LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
|
||||
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
||||
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API int llama_model_n_vocab(const struct llama_model * model);
|
||||
LLAMA_API int llama_model_n_ctx (const struct llama_model * model);
|
||||
LLAMA_API int llama_model_n_embd (const struct llama_model * model);
|
||||
|
||||
// Get a string describing the model type
|
||||
LLAMA_API int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size);
|
||||
|
||||
// Returns 0 on success
|
||||
LLAMA_API int llama_model_quantize(
|
||||
const char * fname_inp,
|
||||
@@ -255,9 +275,9 @@ extern "C" {
|
||||
|
||||
LLAMA_API int llama_model_apply_lora_from_file(
|
||||
const struct llama_model * model,
|
||||
const char * path_lora,
|
||||
const char * path_base_model,
|
||||
int n_threads);
|
||||
const char * path_lora,
|
||||
const char * path_base_model,
|
||||
int n_threads);
|
||||
|
||||
// Returns the number of tokens in the KV cache
|
||||
LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
|
||||
@@ -307,11 +327,40 @@ extern "C" {
|
||||
// IMPORTANT: do not use for anything else other than debugging and testing!
|
||||
LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
|
||||
|
||||
// Token logits obtained from the last call to llama_eval()
|
||||
// The logits for the last token are stored in the last row
|
||||
// Can be mutated in order to change the probabilities of the next token
|
||||
// Rows: n_tokens
|
||||
// Cols: n_vocab
|
||||
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
||||
|
||||
// Get the embeddings for the input
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
||||
|
||||
//
|
||||
// Vocab
|
||||
//
|
||||
|
||||
LLAMA_API const char * llama_token_get_text(const struct llama_context * ctx, llama_token token);
|
||||
|
||||
LLAMA_API float llama_token_get_score(const struct llama_context * ctx, llama_token token);
|
||||
|
||||
LLAMA_API llama_token_type llama_token_get_type(const struct llama_context * ctx, llama_token token);
|
||||
|
||||
// Special tokens
|
||||
LLAMA_API llama_token llama_token_bos(const struct llama_context * ctx); // beginning-of-sentence
|
||||
LLAMA_API llama_token llama_token_eos(const struct llama_context * ctx); // end-of-sentence
|
||||
LLAMA_API llama_token llama_token_nl (const struct llama_context * ctx); // next-line
|
||||
|
||||
//
|
||||
// Tokenization
|
||||
//
|
||||
|
||||
// Convert the provided text into tokens.
|
||||
// The tokens pointer must be large enough to hold the resulting tokens.
|
||||
// Returns the number of tokens on success, no more than n_max_tokens
|
||||
// Returns a negative number on failure - the number of tokens that would have been returned
|
||||
// TODO: not sure if correct
|
||||
LLAMA_API int llama_tokenize(
|
||||
struct llama_context * ctx,
|
||||
const char * text,
|
||||
@@ -319,6 +368,13 @@ extern "C" {
|
||||
int n_max_tokens,
|
||||
bool add_bos);
|
||||
|
||||
LLAMA_API int llama_tokenize_bpe(
|
||||
struct llama_context * ctx,
|
||||
const char * text,
|
||||
llama_token * tokens,
|
||||
int n_max_tokens,
|
||||
bool add_bos);
|
||||
|
||||
LLAMA_API int llama_tokenize_with_model(
|
||||
const struct llama_model * model,
|
||||
const char * text,
|
||||
@@ -326,55 +382,30 @@ extern "C" {
|
||||
int n_max_tokens,
|
||||
bool add_bos);
|
||||
|
||||
LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
|
||||
LLAMA_API int llama_n_ctx (const struct llama_context * ctx);
|
||||
LLAMA_API int llama_n_embd (const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API int llama_n_vocab_from_model(const struct llama_model * model);
|
||||
LLAMA_API int llama_n_ctx_from_model (const struct llama_model * model);
|
||||
LLAMA_API int llama_n_embd_from_model (const struct llama_model * model);
|
||||
|
||||
// Get the vocabulary as output parameters.
|
||||
// Returns number of results.
|
||||
LLAMA_API int llama_get_vocab(
|
||||
const struct llama_context * ctx,
|
||||
const char * * strings,
|
||||
float * scores,
|
||||
int capacity);
|
||||
|
||||
LLAMA_API int llama_get_vocab_from_model(
|
||||
const struct llama_model * model,
|
||||
const char * * strings,
|
||||
float * scores,
|
||||
int capacity);
|
||||
|
||||
// Token logits obtained from the last call to llama_eval()
|
||||
// The logits for the last token are stored in the last row
|
||||
// Can be mutated in order to change the probabilities of the next token
|
||||
// Rows: n_tokens
|
||||
// Cols: n_vocab
|
||||
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
|
||||
|
||||
// Get the embeddings for the input
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
||||
|
||||
// Token Id -> String. Uses the vocabulary in the provided context
|
||||
LLAMA_API const char * llama_token_to_str(
|
||||
// Does not write null terminator to the buffer
|
||||
LLAMA_API int llama_token_to_str(
|
||||
const struct llama_context * ctx,
|
||||
llama_token token);
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int length);
|
||||
|
||||
LLAMA_API const char * llama_token_to_str_with_model(
|
||||
LLAMA_API int llama_token_to_str_bpe(
|
||||
const struct llama_context * ctx,
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int length);
|
||||
|
||||
LLAMA_API int llama_token_to_str_with_model(
|
||||
const struct llama_model * model,
|
||||
llama_token token);
|
||||
|
||||
// Special tokens
|
||||
LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
|
||||
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
|
||||
LLAMA_API llama_token llama_token_nl(); // next-line
|
||||
llama_token token,
|
||||
char * buf,
|
||||
int length);
|
||||
|
||||
//
|
||||
// Grammar
|
||||
//
|
||||
|
||||
LLAMA_API struct llama_grammar * llama_grammar_init(
|
||||
const llama_grammar_element ** rules,
|
||||
size_t n_rules,
|
||||
@@ -382,7 +413,9 @@ extern "C" {
|
||||
|
||||
LLAMA_API void llama_grammar_free(struct llama_grammar * grammar);
|
||||
|
||||
//
|
||||
// Sampling functions
|
||||
//
|
||||
|
||||
/// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
|
||||
LLAMA_API void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float penalty);
|
||||
@@ -451,6 +484,10 @@ extern "C" {
|
||||
// Print system information
|
||||
LLAMA_API const char * llama_print_system_info(void);
|
||||
|
||||
// Set callback for all future logging events.
|
||||
// If this is not called, or NULL is supplied, everything is output on stderr.
|
||||
LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -460,10 +497,11 @@ extern "C" {
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
struct ggml_tensor;
|
||||
|
||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
|
||||
|
||||
#endif
|
||||
#endif // LLAMA_API_INTERNAL
|
||||
|
||||
#endif // LLAMA_H
|
||||
|
||||
1
models/.editorconfig
Normal file
1
models/.editorconfig
Normal file
@@ -0,0 +1 @@
|
||||
root = true
|
||||
BIN
models/ggml-vocab-llama.gguf
Normal file
BIN
models/ggml-vocab-llama.gguf
Normal file
Binary file not shown.
Binary file not shown.
3
scripts/get-wikitext-2.sh
Normal file
3
scripts/get-wikitext-2.sh
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
|
||||
@@ -1,14 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
cp -rpv ../ggml/src/ggml.c ./ggml.c
|
||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
||||
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
||||
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
||||
cp -rpv ../ggml/src/ggml.c ./ggml.c
|
||||
cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
||||
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
||||
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
||||
cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h
|
||||
|
||||
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
||||
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
||||
|
||||
@@ -1,15 +1,36 @@
|
||||
function(llama_add_test source)
|
||||
function(llama_build_executable source)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
add_executable(${TEST_TARGET} ${source})
|
||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||
endfunction()
|
||||
|
||||
function(llama_test_executable name source)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
# add_executable(${TEST_TARGET} ${source})
|
||||
# install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||
# target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
||||
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||
endfunction()
|
||||
|
||||
function(llama_build_and_test_executable source)
|
||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||
add_executable(${TEST_TARGET} ${source})
|
||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||
endfunction()
|
||||
|
||||
# llama_add_test(test-double-float.cpp) # SLOW
|
||||
llama_add_test(test-quantize-fns.cpp)
|
||||
llama_add_test(test-quantize-perf.cpp)
|
||||
llama_add_test(test-sampling.cpp)
|
||||
llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
|
||||
llama_add_test(test-grad0.cpp) # SLOW
|
||||
# llama_add_test(test-opt.cpp) # SLOW
|
||||
# llama_build_and_test_executable(test-double-float.cpp) # SLOW
|
||||
llama_build_and_test_executable(test-quantize-fns.cpp)
|
||||
llama_build_and_test_executable(test-quantize-perf.cpp)
|
||||
llama_build_and_test_executable(test-sampling.cpp)
|
||||
llama_build_executable(test-tokenizer-0.cpp)
|
||||
llama_test_executable (test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_build_executable(test-tokenizer-1.cpp)
|
||||
llama_test_executable (test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||
llama_build_and_test_executable(test-grammar-parser.cpp)
|
||||
llama_build_and_test_executable(test-llama-grammar.cpp)
|
||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
||||
|
||||
250
tests/test-grammar-parser.cpp
Normal file
250
tests/test-grammar-parser.cpp
Normal file
@@ -0,0 +1,250 @@
|
||||
#ifdef NDEBUG
|
||||
#undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "llama.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
int main()
|
||||
{
|
||||
grammar_parser::parse_state parsed_grammar;
|
||||
|
||||
const char *grammar_bytes = R"""(root ::= (expr "=" term "\n")+
|
||||
expr ::= term ([-+*/] term)*
|
||||
term ::= [0-9]+)""";
|
||||
|
||||
parsed_grammar = grammar_parser::parse(grammar_bytes);
|
||||
|
||||
std::vector<std::pair<std::string, uint32_t>> expected = {
|
||||
{"expr", 2},
|
||||
{"expr_5", 5},
|
||||
{"expr_6", 6},
|
||||
{"root", 0},
|
||||
{"root_1", 1},
|
||||
{"root_4", 4},
|
||||
{"term", 3},
|
||||
{"term_7", 7},
|
||||
};
|
||||
|
||||
uint32_t index = 0;
|
||||
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
||||
{
|
||||
std::string key = it->first;
|
||||
uint32_t value = it->second;
|
||||
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
if (expected_pair.first != key || expected_pair.second != value)
|
||||
{
|
||||
fprintf(stderr, "expected_pair: %s, %d\n", expected_pair.first.c_str(), expected_pair.second);
|
||||
fprintf(stderr, "actual_pair: %s, %d\n", key.c_str(), value);
|
||||
fprintf(stderr, "expected_pair != actual_pair\n");
|
||||
}
|
||||
|
||||
assert(expected_pair.first == key && expected_pair.second == value);
|
||||
|
||||
index++;
|
||||
}
|
||||
std::vector<llama_grammar_element> expected_rules = {
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 2},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_CHAR, 10},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_RULE_REF, 6},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 1},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 1},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 45},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 43},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 42},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 47},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_RULE_REF, 6},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
};
|
||||
|
||||
index = 0;
|
||||
for (auto rule : parsed_grammar.rules)
|
||||
{
|
||||
// compare rule to expected rule
|
||||
for (uint32_t i = 0; i < rule.size(); i++)
|
||||
{
|
||||
llama_grammar_element element = rule[i];
|
||||
llama_grammar_element expected_element = expected_rules[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
if (expected_element.type != element.type || expected_element.value != element.value)
|
||||
{
|
||||
fprintf(stderr, "index: %d\n", index);
|
||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
||||
fprintf(stderr, "actual_element: %d, %d\n", element.type, element.value);
|
||||
fprintf(stderr, "expected_element != actual_element\n");
|
||||
}
|
||||
|
||||
assert(expected_element.type == element.type && expected_element.value == element.value);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
const char *longer_grammar_bytes = R"""(
|
||||
root ::= (expr "=" ws term "\n")+
|
||||
expr ::= term ([-+*/] term)*
|
||||
term ::= ident | num | "(" ws expr ")" ws
|
||||
ident ::= [a-z] [a-z0-9_]* ws
|
||||
num ::= [0-9]+ ws
|
||||
ws ::= [ \t\n]*
|
||||
)""";
|
||||
|
||||
parsed_grammar = grammar_parser::parse(longer_grammar_bytes);
|
||||
|
||||
expected = {
|
||||
{"expr", 2},
|
||||
{"expr_6", 6},
|
||||
{"expr_7", 7},
|
||||
{"ident", 8},
|
||||
{"ident_10", 10},
|
||||
{"num", 9},
|
||||
{"num_11", 11},
|
||||
{"root", 0},
|
||||
{"root_1", 1},
|
||||
{"root_5", 5},
|
||||
{"term", 4},
|
||||
{"ws", 3},
|
||||
{"ws_12", 12},
|
||||
};
|
||||
|
||||
index = 0;
|
||||
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
|
||||
{
|
||||
std::string key = it->first;
|
||||
uint32_t value = it->second;
|
||||
std::pair<std::string, uint32_t> expected_pair = expected[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
if (expected_pair.first != key || expected_pair.second != value)
|
||||
{
|
||||
fprintf(stderr, "expected_pair: %s, %d\n", expected_pair.first.c_str(), expected_pair.second);
|
||||
fprintf(stderr, "actual_pair: %s, %d\n", key.c_str(), value);
|
||||
fprintf(stderr, "expected_pair != actual_pair\n");
|
||||
}
|
||||
|
||||
assert(expected_pair.first == key && expected_pair.second == value);
|
||||
|
||||
index++;
|
||||
}
|
||||
expected_rules = {
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 2},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_CHAR, 10},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 12},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 8},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 9},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 40},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_RULE_REF, 2},
|
||||
{LLAMA_GRETYPE_CHAR, 41},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 1},
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 1},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 45},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 43},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 42},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 47},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 6},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
||||
{LLAMA_GRETYPE_RULE_REF, 10},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 11},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 95},
|
||||
{LLAMA_GRETYPE_RULE_REF, 10},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_RULE_REF, 11},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 32},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 9},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 10},
|
||||
{LLAMA_GRETYPE_RULE_REF, 12},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
};
|
||||
|
||||
index = 0;
|
||||
for (auto rule : parsed_grammar.rules)
|
||||
{
|
||||
// compare rule to expected rule
|
||||
for (uint32_t i = 0; i < rule.size(); i++)
|
||||
{
|
||||
llama_grammar_element element = rule[i];
|
||||
llama_grammar_element expected_element = expected_rules[index];
|
||||
|
||||
// pretty print error message before asserting
|
||||
if (expected_element.type != element.type || expected_element.value != element.value)
|
||||
{
|
||||
fprintf(stderr, "index: %d\n", index);
|
||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
||||
fprintf(stderr, "actual_element: %d, %d\n", element.type, element.value);
|
||||
fprintf(stderr, "expected_element != actual_element\n");
|
||||
}
|
||||
|
||||
assert(expected_element.type == element.type && expected_element.value == element.value);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
403
tests/test-llama-grammar.cpp
Normal file
403
tests/test-llama-grammar.cpp
Normal file
@@ -0,0 +1,403 @@
|
||||
#ifdef NDEBUG
|
||||
#undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "llama.cpp" // TODO: not great
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
int main()
|
||||
{
|
||||
grammar_parser::parse_state parsed_grammar;
|
||||
|
||||
std::vector<std::pair<std::string, uint32_t>> expected = {
|
||||
{"expr", 2},
|
||||
{"expr_6", 6},
|
||||
{"expr_7", 7},
|
||||
{"ident", 8},
|
||||
{"ident_10", 10},
|
||||
{"num", 9},
|
||||
{"num_11", 11},
|
||||
{"root", 0},
|
||||
{"root_1", 1},
|
||||
{"root_5", 5},
|
||||
{"term", 4},
|
||||
{"ws", 3},
|
||||
{"ws_12", 12},
|
||||
};
|
||||
|
||||
std::vector<std::vector<llama_grammar_element>> expected_rules = {
|
||||
{{LLAMA_GRETYPE_RULE_REF, 5}, {LLAMA_GRETYPE_END, 0}},
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 2},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_CHAR, 10},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{{LLAMA_GRETYPE_RULE_REF, 4}, {LLAMA_GRETYPE_RULE_REF, 7}, {LLAMA_GRETYPE_END, 0}},
|
||||
{{LLAMA_GRETYPE_RULE_REF, 12}, {LLAMA_GRETYPE_END, 0}},
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 8},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_RULE_REF, 9},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 40},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_RULE_REF, 2},
|
||||
{LLAMA_GRETYPE_CHAR, 41},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{{LLAMA_GRETYPE_RULE_REF, 1}, {LLAMA_GRETYPE_RULE_REF, 5}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_RULE_REF, 1}, {LLAMA_GRETYPE_END, 0}},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 45},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 43},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 42},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 47},
|
||||
{LLAMA_GRETYPE_RULE_REF, 4},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{{LLAMA_GRETYPE_RULE_REF, 6}, {LLAMA_GRETYPE_RULE_REF, 7}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
||||
{LLAMA_GRETYPE_RULE_REF, 10},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{{LLAMA_GRETYPE_RULE_REF, 11}, {LLAMA_GRETYPE_RULE_REF, 3}, {LLAMA_GRETYPE_END, 0}},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 95},
|
||||
{LLAMA_GRETYPE_RULE_REF, 10},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_RULE_REF, 11},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 32},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 9},
|
||||
{LLAMA_GRETYPE_CHAR_ALT, 10},
|
||||
{LLAMA_GRETYPE_RULE_REF, 12},
|
||||
{LLAMA_GRETYPE_ALT, 0},
|
||||
{LLAMA_GRETYPE_END, 0},
|
||||
},
|
||||
};
|
||||
|
||||
for (auto pair : expected)
|
||||
{
|
||||
parsed_grammar.symbol_ids[pair.first] = pair.second;
|
||||
}
|
||||
|
||||
for (auto rule : expected_rules)
|
||||
{
|
||||
parsed_grammar.rules.push_back({});
|
||||
for (auto element : rule)
|
||||
{
|
||||
parsed_grammar.rules.back().push_back(element);
|
||||
}
|
||||
}
|
||||
|
||||
llama_grammar *grammar = NULL;
|
||||
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
|
||||
grammar = llama_grammar_init(
|
||||
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||
|
||||
std::vector<std::vector<llama_grammar_element>> expected_stacks = {
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_RULE_REF, 5},
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_CHAR, 40},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_CHAR, 97},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_RULE_REF, 3},
|
||||
{LLAMA_GRETYPE_CHAR, 48},
|
||||
},
|
||||
{
|
||||
{LLAMA_GRETYPE_CHAR, 61},
|
||||
{LLAMA_GRETYPE_RULE_REF, 7},
|
||||
{LLAMA_GRETYPE_CHAR, 40},
|
||||
}};
|
||||
|
||||
auto index = 0;
|
||||
for (auto stack : grammar->stacks)
|
||||
{
|
||||
// compare stack to expected_stack
|
||||
for (uint32_t i = 0; i < stack.size(); i++)
|
||||
{
|
||||
auto element = stack[i];
|
||||
auto expected_element = expected_stacks[index][i];
|
||||
|
||||
// pretty print error message before asserting
|
||||
if (expected_element.type != element->type || expected_element.value != element->value)
|
||||
{
|
||||
fprintf(stderr, "index: %d\n", index);
|
||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
||||
fprintf(stderr, "actual_element: %d, %d\n", element->type, element->value);
|
||||
fprintf(stderr, "expected_element != actual_element\n");
|
||||
}
|
||||
|
||||
assert(expected_element.type == element->type && expected_element.value == element->value);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
std::vector<std::vector<const llama_grammar_element *>> next_stacks;
|
||||
std::vector<llama_grammar_candidate> next_candidates;
|
||||
next_candidates.resize(24);
|
||||
|
||||
for (size_t i = 0; i < 24; ++i)
|
||||
{
|
||||
uint32_t *cp = new uint32_t[2]; // dynamically allocate memory for code_point
|
||||
cp[0] = 37 + i;
|
||||
cp[1] = 0;
|
||||
next_candidates[i] = {i, cp, {}};
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::pair<uint32_t, uint16_t>>> expected_reject = {
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{11, 48},
|
||||
{12, 49},
|
||||
{13, 50},
|
||||
{14, 51},
|
||||
{15, 52},
|
||||
{16, 53},
|
||||
{17, 54},
|
||||
{18, 55},
|
||||
{19, 56},
|
||||
{20, 57},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{11, 48},
|
||||
{12, 49},
|
||||
{13, 50},
|
||||
{14, 51},
|
||||
{15, 52},
|
||||
{16, 53},
|
||||
{17, 54},
|
||||
{18, 55},
|
||||
{19, 56},
|
||||
{20, 57},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{11, 48},
|
||||
{12, 49},
|
||||
{13, 50},
|
||||
{14, 51},
|
||||
{15, 52},
|
||||
{16, 53},
|
||||
{17, 54},
|
||||
{18, 55},
|
||||
{19, 56},
|
||||
{20, 57},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{3, 40},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
{
|
||||
{0, 37},
|
||||
{1, 38},
|
||||
{2, 39},
|
||||
{4, 41},
|
||||
{5, 42},
|
||||
{6, 43},
|
||||
{7, 44},
|
||||
{8, 45},
|
||||
{9, 46},
|
||||
{10, 47},
|
||||
{11, 48},
|
||||
{12, 49},
|
||||
{13, 50},
|
||||
{14, 51},
|
||||
{15, 52},
|
||||
{16, 53},
|
||||
{17, 54},
|
||||
{18, 55},
|
||||
{19, 56},
|
||||
{20, 57},
|
||||
{21, 58},
|
||||
{22, 59},
|
||||
{23, 60},
|
||||
},
|
||||
};
|
||||
|
||||
std::vector<llama_grammar_candidate> rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[0], next_candidates);
|
||||
|
||||
std::vector<std::vector<llama_grammar_candidate>> all_rejects;
|
||||
|
||||
for (std::size_t count = 0; count < grammar->stacks.size(); ++count)
|
||||
{
|
||||
rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[count], next_candidates);
|
||||
all_rejects.push_back(rejects);
|
||||
}
|
||||
|
||||
index = 0;
|
||||
for (auto rej : all_rejects)
|
||||
{
|
||||
for (uint32_t i = 0; i < rej.size(); i++)
|
||||
{
|
||||
auto element = rej[i];
|
||||
auto expected_element = expected_reject[index][i];
|
||||
assert(element.index == expected_element.first && *element.code_points == expected_element.second);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
for (auto &candidate : next_candidates)
|
||||
{
|
||||
delete[] candidate.code_points;
|
||||
candidate.code_points = nullptr;
|
||||
}
|
||||
delete grammar;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,22 +1,55 @@
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
static const std::map<std::string, std::vector<llama_token>> & k_tests()
|
||||
{
|
||||
static std::string unescape_whitespace(llama_context* ctx, const std::vector<llama_token>& tokens) {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
result += llama_token_to_str(ctx, tokens[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
||||
static std::map<std::string, std::vector<llama_token>> _k_tests = {
|
||||
{ "Hello World", { 1, 10994, 2787, }, },
|
||||
{ " Hello World", { 1, 15043, 2787, }, },
|
||||
{ " Hello World!", { 1, 15043, 2787, 29991, }, },
|
||||
{ " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
|
||||
{ "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
|
||||
{ "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, },
|
||||
{ " ", {1, 259, }, },
|
||||
{ " ", { 1, 1678, }, },
|
||||
{ " ", { 1, 268, }, },
|
||||
{ "\t", { 1, 29871, 12, }, },
|
||||
{ "\n", { 1, 29871, 13, }, },
|
||||
{ "\t\n", { 1, 29871, 12, 13, }, },
|
||||
{ "Hello world", { 1, 15043, 3186, }, },
|
||||
{ " Hello world", { 1, 29871, 15043, 3186, }, },
|
||||
{ "Hello World", { 1, 15043, 2787, }, },
|
||||
{ " Hello World", { 1, 29871, 15043, 2787, }, },
|
||||
{ " Hello World!", { 1, 29871, 15043, 2787, 29991, }, },
|
||||
{ " this is 🦙.cpp", { 1, 29871, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
|
||||
{ "w048 7tuijk dsdfhu", { 1, 281, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
|
||||
{ "нещо на Български", { 1, 1538, 4851, 665, 1386, 29713, 1305, }, },
|
||||
{ "កាន់តែពិសេសអាចខលចេញ", { 1, 29871, 31849, 31324, 31934, 228, 162, 142, 228, 161,
|
||||
146, 228, 162, 133, 228, 161, 153, 228, 161, 186,
|
||||
31708, 228, 162, 132, 31708, 228, 161, 165, 31324, 228,
|
||||
161, 136, 228, 161, 132, 228, 161, 158, 228, 161,
|
||||
136, 228, 162, 132, 228, 161, 140, }, },
|
||||
{ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
|
||||
{ 1, 29871, 243, 162, 157, 131, 313, 8945, 29897, 29871,
|
||||
243, 162, 155, 185, 30722, 243, 162, 143, 174, 30598,
|
||||
313, 20787, 953, 3848, 275, 16125, 630, 29897, 29871, 31681,
|
||||
313, 6194, 953, 29877, 2397, 393, 756, 967, 1914, 5993, 29897, }, },
|
||||
{ "Hello", { 1, 15043 }, },
|
||||
{ " Hello", { 1, 29871, 15043 }, },
|
||||
{ " Hello", { 1, 259, 15043 }, },
|
||||
{ " Hello", { 1, 1678, 15043 }, },
|
||||
{ " Hello", { 1, 268, 15043 }, },
|
||||
{ " Hello\n Hello", { 1, 268, 15043, 13, 1678, 15043 }, },
|
||||
};
|
||||
|
||||
return _k_tests;
|
||||
};
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 2) {
|
||||
@@ -64,10 +97,12 @@ int main(int argc, char **argv) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool success = true;
|
||||
|
||||
for (const auto & test_kv : k_tests()) {
|
||||
std::vector<llama_token> res(test_kv.first.size());
|
||||
const int n = llama_tokenize(ctx, test_kv.first.c_str(), res.data(), int(res.size()), true);
|
||||
res.resize(n);
|
||||
std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
|
||||
fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
|
||||
__func__, test_kv.first.c_str(), unescape_whitespace(ctx, res).c_str());
|
||||
|
||||
bool correct = res.size() == test_kv.second.size();
|
||||
|
||||
@@ -78,7 +113,9 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
if (!correct) {
|
||||
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
||||
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
||||
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
||||
unescape_whitespace(ctx, res).c_str(), unescape_whitespace(ctx, test_kv.second).c_str());
|
||||
fprintf(stderr, "%s : expected tokens: ", __func__);
|
||||
for (const auto & t : test_kv.second) {
|
||||
fprintf(stderr, "%6d, ", t);
|
||||
@@ -90,9 +127,7 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
llama_free_model(model);
|
||||
llama_free(ctx);
|
||||
return 3;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,5 +136,5 @@ int main(int argc, char **argv) {
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
return 0;
|
||||
return success ? 0 : 3;
|
||||
}
|
||||
|
||||
124
tests/test-tokenizer-1.cpp
Normal file
124
tests/test-tokenizer-1.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <codecvt>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
|
||||
static std::string escape_whitespace(const std::string& text) {
|
||||
std::string result = "\xe2\x96\x81";
|
||||
for (size_t offs = 0; offs < text.length(); ++offs) {
|
||||
if (text[offs] == ' ') {
|
||||
result += "\xe2\x96\x81";
|
||||
} else {
|
||||
result += text[offs];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string unescape_whitespace(llama_context * ctx, const std::vector<llama_token> & tokens) {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
result += llama_token_to_str(ctx, tokens[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string fname = argv[1];
|
||||
|
||||
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
||||
|
||||
llama_model * model;
|
||||
llama_context * ctx;
|
||||
|
||||
llama_backend_init(false);
|
||||
|
||||
// load the vocab
|
||||
{
|
||||
auto lparams = llama_context_default_params();
|
||||
|
||||
lparams.vocab_only = true;
|
||||
|
||||
model = llama_load_model_from_file(fname.c_str(), lparams);
|
||||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
ctx = llama_new_context_with_model(model, lparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
llama_free_model(model);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
const int n_vocab = llama_n_vocab(ctx);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string forward = llama_token_to_str_bpe(ctx, i);
|
||||
std::vector<llama_token> tokens = llama_tokenize_bpe(ctx, forward, false);
|
||||
if (tokens.size() == 1) {
|
||||
if (i != tokens[0]) {
|
||||
std::string backward = llama_token_to_str(ctx, tokens[0]);
|
||||
fprintf(stderr, "%s : error: token %d is string %s but bpe returns token %d %s\n",
|
||||
__func__, i, llama_token_to_str(ctx, i).c_str(), tokens[0], backward.c_str());
|
||||
return 2;
|
||||
}
|
||||
} else {
|
||||
llama_token_type type = llama_token_get_type(ctx, i);
|
||||
if (type == LLAMA_TOKEN_TYPE_UNKNOWN || type == LLAMA_TOKEN_TYPE_CONTROL || type == LLAMA_TOKEN_TYPE_BYTE) {
|
||||
fprintf(stderr, "%s : info: token %d is string %s and bpe returns tokens %s\n",
|
||||
__func__, i, llama_token_to_str(ctx, i).c_str(), unescape_whitespace(ctx, tokens).c_str());
|
||||
} else {
|
||||
fprintf(stderr, "%s : error: token %d is string %s but bpe returns tokens %s\n",
|
||||
__func__, i, llama_token_to_str(ctx, i).c_str(), unescape_whitespace(ctx, tokens).c_str());
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
std::wstring_convert<typename std::codecvt_utf8<char16_t>, char16_t> u16converter;
|
||||
for (char16_t ch = 0x0000; ch < 0xffff; ++ch) {
|
||||
std::u16string u16str(1, ch);
|
||||
std::string str = u16converter.to_bytes(u16str);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, escape_whitespace(str).c_str(), false);
|
||||
if (tokens.size() == 1) {
|
||||
fprintf(stderr, "%s : info: %s tokenized to %d \n",
|
||||
__func__, str.c_str(), tokens[0]);
|
||||
}
|
||||
}
|
||||
|
||||
std::wstring_convert<typename std::codecvt_utf8<char32_t>, char32_t> u32converter;
|
||||
for (char32_t ch = 0x0000; ch < 0x0010ffff; ++ch) {
|
||||
std::u32string u32str(1, ch);
|
||||
std::string str = u32converter.to_bytes(u32str);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, escape_whitespace(str).c_str(), false);
|
||||
if (tokens.size() == 1) {
|
||||
fprintf(stderr, "%s : info: %s tokenized to %d \n", __func__, str.c_str(), tokens[0]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
llama_free_model(model);
|
||||
llama_free(ctx);
|
||||
|
||||
llama_backend_free();
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user