mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-19 14:13:22 +02:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0fc560fe96 | ||
|
|
db5c2ad30e | ||
|
|
97e40df5d6 | ||
|
|
837f426f19 | ||
|
|
9d13776f34 | ||
|
|
2c7ff2c7ae | ||
|
|
0dc0e9aa42 |
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
models/ggml-vocab-*.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
57
.github/workflows/build.yml
vendored
57
.github/workflows/build.yml
vendored
@@ -33,6 +33,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Dependencies
|
||||
@@ -91,6 +92,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Dependencies
|
||||
@@ -153,6 +155,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -188,6 +192,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -211,6 +217,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Dependencies
|
||||
@@ -285,6 +292,8 @@ jobs:
|
||||
# - name: Clone
|
||||
# id: checkout
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Dependencies
|
||||
# id: depends
|
||||
@@ -319,6 +328,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -347,6 +358,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -369,6 +382,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: add oneAPI to apt
|
||||
shell: bash
|
||||
@@ -393,6 +408,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
@@ -410,6 +427,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: add oneAPI to apt
|
||||
shell: bash
|
||||
@@ -434,6 +453,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
@@ -454,6 +475,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -485,6 +508,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -514,6 +539,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -543,6 +570,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -576,6 +605,8 @@ jobs:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
@@ -606,6 +637,8 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Setup ${{ matrix.sys }}
|
||||
uses: msys2/setup-msys2@v2
|
||||
@@ -687,6 +720,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone Kompute submodule
|
||||
@@ -833,6 +867,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: Jimver/cuda-toolkit@v0.2.11
|
||||
@@ -906,6 +941,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install
|
||||
@@ -947,6 +983,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Build Xcode project
|
||||
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
||||
@@ -957,6 +995,8 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Set up JDK
|
||||
uses: actions/setup-java@v3
|
||||
@@ -979,7 +1019,9 @@ jobs:
|
||||
# runs-on: macos-12
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# uses: actions/checkout@#v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Build
|
||||
# uses: cross-platform-actions/action@v0.19.0
|
||||
@@ -1012,6 +1054,7 @@ jobs:
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Determine tag name
|
||||
@@ -1077,6 +1120,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -1101,6 +1146,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -1125,6 +1172,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -1155,6 +1204,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Add msbuild to PATH
|
||||
# uses: microsoft/setup-msbuild@v1
|
||||
@@ -1194,6 +1245,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Add msbuild to PATH
|
||||
# uses: microsoft/setup-msbuild@v1
|
||||
@@ -1240,6 +1293,8 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# lfs: true
|
||||
#
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
|
||||
8
.github/workflows/code-coverage.yml
vendored
8
.github/workflows/code-coverage.yml
vendored
@@ -13,14 +13,16 @@ jobs:
|
||||
run:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential gcc-8 lcov
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
|
||||
- name: Build
|
||||
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests
|
||||
|
||||
|
||||
@@ -67,9 +67,7 @@ models = [
|
||||
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
||||
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
|
||||
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
|
||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
|
||||
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
|
||||
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
|
||||
]
|
||||
|
||||
# make directory "models/tokenizers" if it doesn't exist
|
||||
@@ -153,8 +151,6 @@ for model in models:
|
||||
# print the "pre_tokenizer" content from the tokenizer.json
|
||||
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
cfg = json.load(f)
|
||||
normalizer = cfg["normalizer"]
|
||||
logger.info("normalizer: " + json.dumps(normalizer, indent=4))
|
||||
pre_tokenizer = cfg["pre_tokenizer"]
|
||||
logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
|
||||
|
||||
|
||||
@@ -314,15 +314,9 @@ class Model(ABC):
|
||||
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
|
||||
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
||||
res = "command-r"
|
||||
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
|
||||
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
|
||||
res = "qwen2"
|
||||
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
|
||||
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
|
||||
res = "olmo"
|
||||
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
|
||||
# ref: https://huggingface.co/databricks/dbrx-instruct
|
||||
res = "dbrx"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
||||
47
convert.py
47
convert.py
@@ -1508,27 +1508,25 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
if args.big_endian:
|
||||
endianess = gguf.GGUFEndian.BIG
|
||||
|
||||
params = None
|
||||
if args.pad_vocab or not args.vocab_only:
|
||||
params = Params.load(model_plus)
|
||||
if params.n_ctx == -1:
|
||||
if args.ctx is None:
|
||||
msg = """\
|
||||
The model doesn't have a context size, and you didn't specify one with --ctx
|
||||
Please specify one with --ctx:
|
||||
- LLaMA v1: --ctx 2048
|
||||
- LLaMA v2: --ctx 4096"""
|
||||
parser.error(textwrap.dedent(msg))
|
||||
params.n_ctx = args.ctx
|
||||
params = Params.load(model_plus)
|
||||
if params.n_ctx == -1:
|
||||
if args.ctx is None:
|
||||
msg = """\
|
||||
The model doesn't have a context size, and you didn't specify one with --ctx
|
||||
Please specify one with --ctx:
|
||||
- LLaMA v1: --ctx 2048
|
||||
- LLaMA v2: --ctx 4096"""
|
||||
parser.error(textwrap.dedent(msg))
|
||||
params.n_ctx = args.ctx
|
||||
|
||||
if args.outtype:
|
||||
params.ftype = {
|
||||
"f32": GGMLFileType.AllF32,
|
||||
"f16": GGMLFileType.MostlyF16,
|
||||
"q8_0": GGMLFileType.MostlyQ8_0,
|
||||
}[args.outtype]
|
||||
if args.outtype:
|
||||
params.ftype = {
|
||||
"f32": GGMLFileType.AllF32,
|
||||
"f16": GGMLFileType.MostlyF16,
|
||||
"q8_0": GGMLFileType.MostlyQ8_0,
|
||||
}[args.outtype]
|
||||
|
||||
logger.info(f"params = {params}")
|
||||
logger.info(f"params = {params}")
|
||||
|
||||
model_parent_path = model_plus.paths[0].parent
|
||||
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
||||
@@ -1541,17 +1539,6 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
if not args.outfile:
|
||||
raise ValueError("need --outfile if using --vocab-only")
|
||||
outfile = args.outfile
|
||||
if params is None:
|
||||
params = Params(
|
||||
n_vocab = vocab.vocab_size,
|
||||
n_embd = 1,
|
||||
n_layer = 1,
|
||||
n_ctx = 1,
|
||||
n_ff = 1,
|
||||
n_head = 1,
|
||||
n_head_kv = 1,
|
||||
f_norm_eps = 1e-5,
|
||||
)
|
||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
||||
endianess=endianess, pad_vocab=args.pad_vocab)
|
||||
logger.info(f"Wrote {outfile}")
|
||||
|
||||
@@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
|
||||
`content`: Set the text to tokenize.
|
||||
|
||||
`add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
|
||||
Note that a special `BOS` token is never inserted.
|
||||
|
||||
- **POST** `/detokenize`: Convert tokens to text.
|
||||
|
||||
|
||||
@@ -3647,8 +3647,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
if (body.count("content") != 0) {
|
||||
const bool add_special = json_value(body, "add_special", false);
|
||||
tokens = ctx_server.tokenize(body["content"], add_special);
|
||||
tokens = ctx_server.tokenize(body["content"], false);
|
||||
}
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
|
||||
@@ -7,7 +7,6 @@ Feature: llama.cpp server
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model file test-model.gguf
|
||||
And a model alias tinyllama-2
|
||||
And BOS token is 1
|
||||
And 42 as server seed
|
||||
# KV Cache corresponds to the total amount of tokens
|
||||
# that can be stored across all independent sequences: #4130
|
||||
@@ -92,18 +91,7 @@ Feature: llama.cpp server
|
||||
"""
|
||||
What is the capital of France ?
|
||||
"""
|
||||
Then tokens can be detokenized
|
||||
And tokens do not begin with BOS
|
||||
|
||||
Scenario: Tokenize w/ BOS
|
||||
Given adding special tokens
|
||||
When tokenizing:
|
||||
"""
|
||||
What is the capital of Germany?
|
||||
"""
|
||||
Then tokens begin with BOS
|
||||
Given first token is removed
|
||||
Then tokens can be detokenized
|
||||
Then tokens can be detokenize
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
|
||||
@@ -376,11 +376,6 @@ def step_seed(context, seed):
|
||||
context.seed.append(seed)
|
||||
|
||||
|
||||
@step('BOS token is {bos:d}')
|
||||
def step_bos_token(context, bos):
|
||||
context.bos = bos
|
||||
|
||||
|
||||
@step('a prefix prompt')
|
||||
def step_prompt_prefix(context):
|
||||
context.prompt_prefix = context_text(context)
|
||||
@@ -661,29 +656,21 @@ async def all_embeddings_are_generated(context):
|
||||
assert_embeddings(context.tasks_result.pop().pop())
|
||||
|
||||
|
||||
@step('adding special tokens')
|
||||
def step_tokenize_set_add_special(context):
|
||||
context.tokenize_add_special = True
|
||||
|
||||
|
||||
@step('tokenizing')
|
||||
@async_run_until_complete
|
||||
async def step_tokenize(context):
|
||||
context.tokenized_text = context_text(context)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tokenize_args = {
|
||||
"content": context.tokenized_text,
|
||||
}
|
||||
if getattr(context, 'tokenize_add_special', None) is not None:
|
||||
tokenize_args['add_special'] = context.tokenize_add_special
|
||||
async with session.post(f'{context.base_url}/tokenize',
|
||||
json=tokenize_args) as response:
|
||||
json={
|
||||
"content": context.tokenized_text,
|
||||
}) as response:
|
||||
assert response.status == 200
|
||||
tokenize_json = await response.json()
|
||||
context.tokens = tokenize_json['tokens']
|
||||
|
||||
|
||||
@step('tokens can be detokenized')
|
||||
@step('tokens can be detokenize')
|
||||
@async_run_until_complete
|
||||
async def step_detokenize(context):
|
||||
assert len(context.tokens) > 0
|
||||
@@ -698,21 +685,6 @@ async def step_detokenize(context):
|
||||
assert context.tokenized_text == detokenize_json['content'].strip()
|
||||
|
||||
|
||||
@step('tokens begin with BOS')
|
||||
def step_strings_for_tokenization(context):
|
||||
assert context.tokens[0] == context.bos
|
||||
|
||||
|
||||
@step('tokens do not begin with BOS')
|
||||
def step_strings_for_tokenization(context):
|
||||
assert context.tokens[0] != context.bos
|
||||
|
||||
|
||||
@step('first token is removed')
|
||||
def step_strings_for_tokenization(context):
|
||||
context.tokens = context.tokens[1:]
|
||||
|
||||
|
||||
@step('an OPTIONS request is sent from {origin}')
|
||||
@async_run_until_complete
|
||||
async def step_options_request(context, origin):
|
||||
|
||||
@@ -49,18 +49,18 @@ extern bool server_log_json;
|
||||
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
|
||||
static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra);
|
||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra);
|
||||
|
||||
template <typename T>
|
||||
static T json_value(const json & body, const std::string & key, const T & default_value) {
|
||||
static T json_value(const json &body, const std::string &key, const T &default_value) {
|
||||
// Fallback null to default value
|
||||
if (body.contains(key) && !body.at(key).is_null()) {
|
||||
if (body.contains(key) && !body.at(key).is_null()){
|
||||
try {
|
||||
return body.at(key);
|
||||
} catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) {
|
||||
std::stringstream ss;
|
||||
ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value.";
|
||||
LOG_WARNING(ss.str().c_str(), body);
|
||||
return body.value(key, default_value);
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::type_error const&){
|
||||
std::string message = "Wrong type supplied for parameter '" + key + "'. Expected '" + typeid(default_value).name() + "', using default value.";
|
||||
server_log("WARN", __func__, __LINE__, message.c_str(), body);
|
||||
return default_value;
|
||||
}
|
||||
} else {
|
||||
@@ -68,16 +68,16 @@ static T json_value(const json & body, const std::string & key, const T & defaul
|
||||
}
|
||||
}
|
||||
|
||||
static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra) {
|
||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
|
||||
std::stringstream ss_tid;
|
||||
ss_tid << std::this_thread::get_id();
|
||||
json log = json{
|
||||
json log = nlohmann::ordered_json{
|
||||
{"tid", ss_tid.str()},
|
||||
{"timestamp", time(nullptr)},
|
||||
};
|
||||
|
||||
if (server_log_json) {
|
||||
log.merge_patch({
|
||||
log.merge_patch( {
|
||||
{"level", level},
|
||||
{"function", function},
|
||||
{"line", line},
|
||||
@@ -98,7 +98,7 @@ static inline void server_log(const char * level, const char * function, int lin
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << buf << " |";
|
||||
for (const auto & el : log.items())
|
||||
for (const auto& el : log.items())
|
||||
{
|
||||
const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
ss << " " << el.key() << "=" << value;
|
||||
|
||||
14
llama.cpp
14
llama.cpp
@@ -4391,15 +4391,9 @@ static void llm_load_vocab(
|
||||
} else if (
|
||||
tokenizer_pre == "command-r") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
|
||||
} else if (
|
||||
tokenizer_pre == "qwen2") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
|
||||
} else if (
|
||||
tokenizer_pre == "olmo") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_OLMO;
|
||||
} else if (
|
||||
tokenizer_pre == "dbrx") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
@@ -12206,7 +12200,6 @@ struct llm_tokenizer_bpe {
|
||||
case LLAMA_VOCAB_TYPE_BPE:
|
||||
switch (vocab.type_pre) {
|
||||
case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
|
||||
case LLAMA_VOCAB_PRE_TYPE_DBRX:
|
||||
word_collection = unicode_regex_split(text, {
|
||||
// original regex from tokenizer.json
|
||||
//"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
@@ -12266,13 +12259,6 @@ struct llm_tokenizer_bpe {
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
});
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
||||
word_collection = unicode_regex_split(text, {
|
||||
// original regex from tokenizer.json
|
||||
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
||||
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
});
|
||||
break;
|
||||
default:
|
||||
// default regex for BPE tokenization pre-processing
|
||||
word_collection = unicode_regex_split(text, {
|
||||
|
||||
4
llama.h
4
llama.h
@@ -81,9 +81,7 @@ extern "C" {
|
||||
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
|
||||
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
|
||||
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
|
||||
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10,
|
||||
LLAMA_VOCAB_PRE_TYPE_OLMO = 11,
|
||||
LLAMA_VOCAB_PRE_TYPE_DBRX = 12,
|
||||
LLAMA_VOCAB_PRE_TYPE_OLMO = 10,
|
||||
};
|
||||
|
||||
// note: these values should be synchronized with ggml_rope
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,106 +0,0 @@
|
||||
ied 4 ½ months
|
||||
__ggml_vocab_test__
|
||||
Führer
|
||||
__ggml_vocab_test__
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
|
||||
|
||||
|
||||
__ggml_vocab_test__
|
||||
|
||||
|
||||
__ggml_vocab_test__
|
||||
Hello world
|
||||
__ggml_vocab_test__
|
||||
Hello world
|
||||
__ggml_vocab_test__
|
||||
Hello World
|
||||
__ggml_vocab_test__
|
||||
Hello World
|
||||
__ggml_vocab_test__
|
||||
Hello World!
|
||||
__ggml_vocab_test__
|
||||
Hello, world!
|
||||
__ggml_vocab_test__
|
||||
Hello, world!
|
||||
__ggml_vocab_test__
|
||||
this is 🦙.cpp
|
||||
__ggml_vocab_test__
|
||||
w048 7tuijk dsdfhu
|
||||
__ggml_vocab_test__
|
||||
нещо на Български
|
||||
__ggml_vocab_test__
|
||||
កាន់តែពិសេសអាចខលចេញ
|
||||
__ggml_vocab_test__
|
||||
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
Hello
|
||||
Hello
|
||||
__ggml_vocab_test__
|
||||
(
|
||||
__ggml_vocab_test__
|
||||
|
||||
=
|
||||
__ggml_vocab_test__
|
||||
' era
|
||||
__ggml_vocab_test__
|
||||
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
|
||||
__ggml_vocab_test__
|
||||
3
|
||||
__ggml_vocab_test__
|
||||
33
|
||||
__ggml_vocab_test__
|
||||
333
|
||||
__ggml_vocab_test__
|
||||
3333
|
||||
__ggml_vocab_test__
|
||||
33333
|
||||
__ggml_vocab_test__
|
||||
333333
|
||||
__ggml_vocab_test__
|
||||
3333333
|
||||
__ggml_vocab_test__
|
||||
33333333
|
||||
__ggml_vocab_test__
|
||||
333333333
|
||||
__ggml_vocab_test__
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
|
||||
__ggml_vocab_test__
|
||||
@@ -1,43 +0,0 @@
|
||||
1122 220 19 220 26062 3951
|
||||
37 50753 261
|
||||
|
||||
220
|
||||
256
|
||||
262
|
||||
197
|
||||
198
|
||||
271
|
||||
1406
|
||||
1572
|
||||
9707 1879
|
||||
21927 1879
|
||||
9707 4337
|
||||
21927 4337
|
||||
21927 4337 0
|
||||
9707 11 1879 0
|
||||
21927 11 1879 0
|
||||
419 374 11162 99 247 13 10821
|
||||
86 15 19 23 220 22 83 1963 41808 11472 2940 16739
|
||||
78762 14144 1456 13073 63471 33594 3038 133178 79012
|
||||
146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 147805 148301 147270 44258 223 146848
|
||||
145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 320 3243 42365 429 702 1181 1828 3950 8
|
||||
9707
|
||||
21927
|
||||
220 21927
|
||||
256 21927
|
||||
262 21927
|
||||
262 21927 198 262 21927
|
||||
320
|
||||
198 284
|
||||
6 11385
|
||||
9707 11 379 64848 0 2585 525 498 26525 223 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216
|
||||
18
|
||||
18 18
|
||||
18 18 18
|
||||
18 18 18 18
|
||||
18 18 18 18 18
|
||||
18 18 18 18 18 18
|
||||
18 18 18 18 18 18 18
|
||||
18 18 18 18 18 18 18 18
|
||||
18 18 18 18 18 18 18 18 18
|
||||
198 4710 14731 65497 7847 1572 2303 78672 10947 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 11162 99 247 149955 220 18 220 18 18 220 18 18 18 220 18 18 18 18 220 18 18 18 18 18 220 18 18 18 18 18 18 220 18 18 18 18 18 18 18 220 18 18 18 18 18 18 18 18 220 18 13 18 220 18 496 18 220 18 1112 18 220 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 144534 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216 55460 53237 18658 14144 1456 13073 63471 33594 3038 133178 79012 3355 4605 4605 13874 13874 73594 3014 3014 28149 17085 2928 26610 7646 358 3003 1012 364 83 813 566 594 1052 11 364 787 498 2704 30 364 44 537 2704 358 3278 1281 432 11 364 35 498 1075 1045 15243 30 1205 6 42612 264 63866 43
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -93,14 +93,11 @@ help_s = (
|
||||
"specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
|
||||
)
|
||||
parser.add_argument("-s", "--show", help=help_s)
|
||||
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||
|
||||
known_args, unknown_args = parser.parse_known_args()
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO)
|
||||
|
||||
if unknown_args:
|
||||
logger.error(f"Received unknown args: {unknown_args}.\n")
|
||||
logger.error(f"Received unknown args: {unknown_args}.")
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
@@ -113,7 +110,7 @@ if input_file is None:
|
||||
input_file = sqlite_files[0]
|
||||
|
||||
if input_file is None:
|
||||
logger.error("Cannot find a suitable input file, please provide one.\n")
|
||||
logger.error("Cannot find a suitable input file, please provide one.")
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
@@ -205,12 +202,12 @@ elif repo is not None:
|
||||
hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)
|
||||
|
||||
if hexsha8_baseline is None:
|
||||
logger.error("No baseline was provided and did not find data for any master branch commits.\n")
|
||||
logger.error("No baseline was provided and did not find data for any master branch commits.")
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.error("No baseline was provided and the current working directory "
|
||||
"is not part of a git repository from which a baseline could be inferred.\n")
|
||||
"is not part of a git repository from which a baseline could be inferred.")
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
@@ -241,7 +238,7 @@ elif repo is not None:
|
||||
break
|
||||
|
||||
if hexsha8_compare is None:
|
||||
logger.error("No compare target was provided and did not find data for any non-master commits.\n")
|
||||
logger.error("No compare target was provided and did not find data for any non-master commits.")
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
else:
|
||||
@@ -364,7 +361,7 @@ if "gpu_info" in show:
|
||||
headers = [PRETTY_NAMES[p] for p in show]
|
||||
headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
|
||||
|
||||
print(tabulate( # noqa: NP100
|
||||
logger.info(tabulate(
|
||||
table,
|
||||
headers=headers,
|
||||
floatfmt=".2f",
|
||||
|
||||
77
sgemm.cpp
77
sgemm.cpp
@@ -1,3 +1,6 @@
|
||||
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
|
||||
// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
|
||||
//
|
||||
// Copyright 2024 Mozilla Foundation
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining
|
||||
@@ -582,15 +585,15 @@ class tinyBLAS_Q0_ARM {
|
||||
};
|
||||
#endif // __ARM_FEATURE_DOTPROD
|
||||
|
||||
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
|
||||
#if defined(__AVX2__) || defined(__AVX512F__)
|
||||
template <typename TA, typename TB, typename TC>
|
||||
class tinyBLAS_Q0_AVX {
|
||||
class tinyBLAS_Q0_AVX2 {
|
||||
public:
|
||||
tinyBLAS_Q0_AVX(int64_t k,
|
||||
const TA *A, int64_t lda,
|
||||
const TB *B, int64_t ldb,
|
||||
TC *C, int64_t ldc,
|
||||
int ith, int nth)
|
||||
tinyBLAS_Q0_AVX2(int64_t k,
|
||||
const TA *A, int64_t lda,
|
||||
const TB *B, int64_t ldb,
|
||||
TC *C, int64_t ldc,
|
||||
int ith, int nth)
|
||||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
@@ -725,34 +728,14 @@ class tinyBLAS_Q0_AVX {
|
||||
__m256 Cv[RN][RM] = {};
|
||||
for (int64_t l = 0; l < k; ++l)
|
||||
for (int64_t j = 0; j < RN; ++j)
|
||||
for (int64_t i = 0; i < RM; ++i) {
|
||||
#if defined(__AVX2__)
|
||||
__m256 udTmp = updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
|
||||
load(A + lda * (ii + i) + l)),
|
||||
_mm256_sign_epi8(load(B + ldb * (jj + j) + l),
|
||||
load(A + lda * (ii + i) + l)));
|
||||
#else
|
||||
__m128i ali0 = load0(A + lda * (ii + i) + l);
|
||||
__m128i ali1 = load1(A + lda * (ii + i) + l);
|
||||
__m128i blj0 = load0(B + ldb * (jj + j) + l);
|
||||
__m128i blj1 = load1(B + ldb * (jj + j) + l);
|
||||
|
||||
__m128i sepAA0 = _mm_sign_epi8(ali0, ali0);
|
||||
__m128i sepAA1 = _mm_sign_epi8(ali1, ali1);
|
||||
__m128i sepBA0 = _mm_sign_epi8(blj0, ali0);
|
||||
__m128i sepBA1 = _mm_sign_epi8(blj1, ali1);
|
||||
|
||||
// updot
|
||||
const __m128i oneFill = _mm_set1_epi16(1);
|
||||
__m128i mad0 = _mm_maddubs_epi16(sepAA0, sepBA0);
|
||||
__m128i mad1 = _mm_maddubs_epi16(sepAA1, sepBA1);
|
||||
__m256 udTmp = _mm256_cvtepi32_ps(MM256_SET_M128I(_mm_madd_epi16(oneFill, mad1), _mm_madd_epi16(oneFill, mad0)));
|
||||
#endif
|
||||
for (int64_t i = 0; i < RM; ++i)
|
||||
Cv[j][i] = madd(_mm256_set1_ps(unhalf(A[lda * (ii + i) + l].d) *
|
||||
unhalf(B[ldb * (jj + j) + l].d)),
|
||||
udTmp,
|
||||
Cv[j][i]);
|
||||
}
|
||||
updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
|
||||
load(A + lda * (ii + i) + l)),
|
||||
_mm256_sign_epi8(load(B + ldb * (jj + j) + l),
|
||||
load(A + lda * (ii + i) + l))),
|
||||
Cv[j][i]);
|
||||
for (int64_t j = 0; j < RN; ++j)
|
||||
for (int64_t i = 0; i < RM; ++i)
|
||||
C[ldc * (jj + j) + (ii + i)] = hsum(Cv[j][i]);
|
||||
@@ -763,28 +746,10 @@ class tinyBLAS_Q0_AVX {
|
||||
return _mm256_loadu_si256((const __m256i *)b->qs);
|
||||
}
|
||||
|
||||
inline __m128i load0(const block_q8_0 *b) {
|
||||
return _mm_loadu_si128((const __m128i *)b->qs);
|
||||
}
|
||||
|
||||
inline __m128i load1(const block_q8_0 *b) {
|
||||
return _mm_loadu_si128(((const __m128i *)b->qs) + 1);
|
||||
}
|
||||
|
||||
inline __m256i load(const block_q4_0 *b) {
|
||||
return _mm256_sub_epi8(denibble(b->qs), _mm256_set1_epi8(8));
|
||||
}
|
||||
|
||||
inline __m128i load0(const block_q4_0 *b) {
|
||||
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
|
||||
return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), x), _mm_set1_epi8(8));
|
||||
}
|
||||
|
||||
inline __m128i load1(const block_q4_0 *b) {
|
||||
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
|
||||
return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)), _mm_set1_epi8(8));
|
||||
}
|
||||
|
||||
inline __m256 updot(__m256i u, __m256i s) {
|
||||
__m256i res;
|
||||
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
|
||||
@@ -812,7 +777,7 @@ class tinyBLAS_Q0_AVX {
|
||||
const int ith;
|
||||
const int nth;
|
||||
};
|
||||
#endif // __AVX__
|
||||
#endif // __AVX2__
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -963,8 +928,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
||||
case GGML_TYPE_Q8_0: {
|
||||
if (Btype != GGML_TYPE_Q8_0)
|
||||
return false;
|
||||
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
|
||||
tinyBLAS_Q0_AVX<block_q8_0, block_q8_0, float> tb{
|
||||
#if defined(__AVX2__) || defined(__AVX512F__)
|
||||
tinyBLAS_Q0_AVX2<block_q8_0, block_q8_0, float> tb{
|
||||
k, (const block_q8_0 *)A, lda,
|
||||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
@@ -987,8 +952,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
||||
case GGML_TYPE_Q4_0: {
|
||||
if (Btype != GGML_TYPE_Q8_0)
|
||||
return false;
|
||||
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
|
||||
tinyBLAS_Q0_AVX<block_q4_0, block_q8_0, float> tb{
|
||||
#if defined(__AVX2__) || defined(__AVX512F__)
|
||||
tinyBLAS_Q0_AVX2<block_q4_0, block_q8_0, float> tb{
|
||||
k, (const block_q4_0 *)A, lda,
|
||||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
|
||||
@@ -84,7 +84,6 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE
|
||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
|
||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
|
||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
|
||||
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf)
|
||||
|
||||
# build test-tokenizer-1-bpe target once and add many tests
|
||||
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
|
||||
|
||||
Reference in New Issue
Block a user