Compare commits

..

7 Commits

Author SHA1 Message Date
Georgi Gerganov
0fc560fe96 ci : enable git lfs for build.yml 2024-05-08 10:53:02 +03:00
Georgi Gerganov
db5c2ad30e Revert "tmp : dummy change to trigger ci"
This reverts commit 97e40df5d6.
2024-05-08 10:42:25 +03:00
Georgi Gerganov
97e40df5d6 tmp : dummy change to trigger ci 2024-05-08 10:42:11 +03:00
Georgi Gerganov
837f426f19 ci : try lfs true 2024-05-08 10:30:25 +03:00
Georgi Gerganov
9d13776f34 ci : deps before checkout 2024-05-08 10:24:53 +03:00
Georgi Gerganov
2c7ff2c7ae ci : add git-lfs
ggml-ci
2024-05-08 10:18:47 +03:00
Georgi Gerganov
0dc0e9aa42 models : convert vocab files to LFS
ggml-ci
2024-05-08 09:54:38 +03:00
36 changed files with 126 additions and 336 deletions

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
models/ggml-vocab-*.gguf filter=lfs diff=lfs merge=lfs -text

View File

@@ -33,6 +33,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Dependencies
@@ -91,6 +92,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Dependencies
@@ -153,6 +155,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -188,6 +192,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -211,6 +217,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Dependencies
@@ -285,6 +292,8 @@ jobs:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Dependencies
# id: depends
@@ -319,6 +328,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -347,6 +358,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -369,6 +382,8 @@ jobs:
steps:
- uses: actions/checkout@v2
with:
lfs: true
- name: add oneAPI to apt
shell: bash
@@ -393,6 +408,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Build
id: cmake_build
@@ -410,6 +427,8 @@ jobs:
steps:
- uses: actions/checkout@v2
with:
lfs: true
- name: add oneAPI to apt
shell: bash
@@ -434,6 +453,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Build
id: cmake_build
@@ -454,6 +475,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -485,6 +508,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Dependencies
id: depends
@@ -514,6 +539,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v1
with:
lfs: true
- name: Dependencies
id: depends
@@ -543,6 +570,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v1
with:
lfs: true
- name: Dependencies
id: depends
@@ -576,6 +605,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v1
with:
lfs: true
- name: Dependencies
id: depends
@@ -606,6 +637,8 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
lfs: true
- name: Setup ${{ matrix.sys }}
uses: msys2/setup-msys2@v2
@@ -687,6 +720,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Clone Kompute submodule
@@ -833,6 +867,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- uses: Jimver/cuda-toolkit@v0.2.11
@@ -906,6 +941,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Install
@@ -947,6 +983,8 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
lfs: true
- name: Build Xcode project
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
@@ -957,6 +995,8 @@ jobs:
steps:
- name: Clone
uses: actions/checkout@v4
with:
lfs: true
- name: Set up JDK
uses: actions/setup-java@v3
@@ -979,7 +1019,9 @@ jobs:
# runs-on: macos-12
# steps:
# - name: Clone
# uses: actions/checkout@v4
# uses: actions/checkout@#v4
# with:
# lfs: true
#
# - name: Build
# uses: cross-platform-actions/action@v0.19.0
@@ -1012,6 +1054,7 @@ jobs:
id: checkout
uses: actions/checkout@v4
with:
lfs: true
fetch-depth: 0
- name: Determine tag name
@@ -1077,6 +1120,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Dependencies
# run: |
@@ -1101,6 +1146,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Dependencies
# run: |
@@ -1125,6 +1172,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Dependencies
# run: |
@@ -1155,6 +1204,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Add msbuild to PATH
# uses: microsoft/setup-msbuild@v1
@@ -1194,6 +1245,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Add msbuild to PATH
# uses: microsoft/setup-msbuild@v1
@@ -1240,6 +1293,8 @@ jobs:
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# lfs: true
#
# - name: Dependencies
# run: |

View File

@@ -13,14 +13,16 @@ jobs:
run:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential gcc-8 lcov
- name: Checkout
uses: actions/checkout@v4
with:
lfs: true
- name: Build
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests

View File

@@ -67,9 +67,7 @@ models = [
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
]
# make directory "models/tokenizers" if it doesn't exist
@@ -153,8 +151,6 @@ for model in models:
# print the "pre_tokenizer" content from the tokenizer.json
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
cfg = json.load(f)
normalizer = cfg["normalizer"]
logger.info("normalizer: " + json.dumps(normalizer, indent=4))
pre_tokenizer = cfg["pre_tokenizer"]
logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))

View File

@@ -314,15 +314,9 @@ class Model(ABC):
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
res = "command-r"
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
res = "qwen2"
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
res = "olmo"
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
# ref: https://huggingface.co/databricks/dbrx-instruct
res = "dbrx"
if res is None:
logger.warning("\n")

View File

@@ -1508,27 +1508,25 @@ def main(args_in: list[str] | None = None) -> None:
if args.big_endian:
endianess = gguf.GGUFEndian.BIG
params = None
if args.pad_vocab or not args.vocab_only:
params = Params.load(model_plus)
if params.n_ctx == -1:
if args.ctx is None:
msg = """\
The model doesn't have a context size, and you didn't specify one with --ctx
Please specify one with --ctx:
- LLaMA v1: --ctx 2048
- LLaMA v2: --ctx 4096"""
parser.error(textwrap.dedent(msg))
params.n_ctx = args.ctx
params = Params.load(model_plus)
if params.n_ctx == -1:
if args.ctx is None:
msg = """\
The model doesn't have a context size, and you didn't specify one with --ctx
Please specify one with --ctx:
- LLaMA v1: --ctx 2048
- LLaMA v2: --ctx 4096"""
parser.error(textwrap.dedent(msg))
params.n_ctx = args.ctx
if args.outtype:
params.ftype = {
"f32": GGMLFileType.AllF32,
"f16": GGMLFileType.MostlyF16,
"q8_0": GGMLFileType.MostlyQ8_0,
}[args.outtype]
if args.outtype:
params.ftype = {
"f32": GGMLFileType.AllF32,
"f16": GGMLFileType.MostlyF16,
"q8_0": GGMLFileType.MostlyQ8_0,
}[args.outtype]
logger.info(f"params = {params}")
logger.info(f"params = {params}")
model_parent_path = model_plus.paths[0].parent
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
@@ -1541,17 +1539,6 @@ def main(args_in: list[str] | None = None) -> None:
if not args.outfile:
raise ValueError("need --outfile if using --vocab-only")
outfile = args.outfile
if params is None:
params = Params(
n_vocab = vocab.vocab_size,
n_embd = 1,
n_layer = 1,
n_ctx = 1,
n_ff = 1,
n_head = 1,
n_head_kv = 1,
f_norm_eps = 1e-5,
)
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
endianess=endianess, pad_vocab=args.pad_vocab)
logger.info(f"Wrote {outfile}")

View File

@@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`.
`content`: Set the text to tokenize.
`add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted. Default: `false`
Note that a special `BOS` token is never inserted.
- **POST** `/detokenize`: Convert tokens to text.

View File

@@ -3647,8 +3647,7 @@ int main(int argc, char ** argv) {
std::vector<llama_token> tokens;
if (body.count("content") != 0) {
const bool add_special = json_value(body, "add_special", false);
tokens = ctx_server.tokenize(body["content"], add_special);
tokens = ctx_server.tokenize(body["content"], false);
}
const json data = format_tokenizer_response(tokens);
return res.set_content(data.dump(), "application/json; charset=utf-8");

View File

@@ -7,7 +7,6 @@ Feature: llama.cpp server
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a model file test-model.gguf
And a model alias tinyllama-2
And BOS token is 1
And 42 as server seed
# KV Cache corresponds to the total amount of tokens
# that can be stored across all independent sequences: #4130
@@ -92,18 +91,7 @@ Feature: llama.cpp server
"""
What is the capital of France ?
"""
Then tokens can be detokenized
And tokens do not begin with BOS
Scenario: Tokenize w/ BOS
Given adding special tokens
When tokenizing:
"""
What is the capital of Germany?
"""
Then tokens begin with BOS
Given first token is removed
Then tokens can be detokenized
Then tokens can be detokenize
Scenario: Models available
Given available models

View File

@@ -376,11 +376,6 @@ def step_seed(context, seed):
context.seed.append(seed)
@step('BOS token is {bos:d}')
def step_bos_token(context, bos):
context.bos = bos
@step('a prefix prompt')
def step_prompt_prefix(context):
context.prompt_prefix = context_text(context)
@@ -661,29 +656,21 @@ async def all_embeddings_are_generated(context):
assert_embeddings(context.tasks_result.pop().pop())
@step('adding special tokens')
def step_tokenize_set_add_special(context):
context.tokenize_add_special = True
@step('tokenizing')
@async_run_until_complete
async def step_tokenize(context):
context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session:
tokenize_args = {
"content": context.tokenized_text,
}
if getattr(context, 'tokenize_add_special', None) is not None:
tokenize_args['add_special'] = context.tokenize_add_special
async with session.post(f'{context.base_url}/tokenize',
json=tokenize_args) as response:
json={
"content": context.tokenized_text,
}) as response:
assert response.status == 200
tokenize_json = await response.json()
context.tokens = tokenize_json['tokens']
@step('tokens can be detokenized')
@step('tokens can be detokenize')
@async_run_until_complete
async def step_detokenize(context):
assert len(context.tokens) > 0
@@ -698,21 +685,6 @@ async def step_detokenize(context):
assert context.tokenized_text == detokenize_json['content'].strip()
@step('tokens begin with BOS')
def step_strings_for_tokenization(context):
assert context.tokens[0] == context.bos
@step('tokens do not begin with BOS')
def step_strings_for_tokenization(context):
assert context.tokens[0] != context.bos
@step('first token is removed')
def step_strings_for_tokenization(context):
context.tokens = context.tokens[1:]
@step('an OPTIONS request is sent from {origin}')
@async_run_until_complete
async def step_options_request(context, origin):

View File

@@ -49,18 +49,18 @@ extern bool server_log_json;
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra);
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra);
template <typename T>
static T json_value(const json & body, const std::string & key, const T & default_value) {
static T json_value(const json &body, const std::string &key, const T &default_value) {
// Fallback null to default value
if (body.contains(key) && !body.at(key).is_null()) {
if (body.contains(key) && !body.at(key).is_null()){
try {
return body.at(key);
} catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) {
std::stringstream ss;
ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value.";
LOG_WARNING(ss.str().c_str(), body);
return body.value(key, default_value);
}
catch (nlohmann::json_abi_v3_11_3::detail::type_error const&){
std::string message = "Wrong type supplied for parameter '" + key + "'. Expected '" + typeid(default_value).name() + "', using default value.";
server_log("WARN", __func__, __LINE__, message.c_str(), body);
return default_value;
}
} else {
@@ -68,16 +68,16 @@ static T json_value(const json & body, const std::string & key, const T & defaul
}
}
static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra) {
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
std::stringstream ss_tid;
ss_tid << std::this_thread::get_id();
json log = json{
json log = nlohmann::ordered_json{
{"tid", ss_tid.str()},
{"timestamp", time(nullptr)},
};
if (server_log_json) {
log.merge_patch({
log.merge_patch( {
{"level", level},
{"function", function},
{"line", line},
@@ -98,7 +98,7 @@ static inline void server_log(const char * level, const char * function, int lin
}
std::stringstream ss;
ss << buf << " |";
for (const auto & el : log.items())
for (const auto& el : log.items())
{
const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
ss << " " << el.key() << "=" << value;

View File

@@ -4391,15 +4391,9 @@ static void llm_load_vocab(
} else if (
tokenizer_pre == "command-r") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
} else if (
tokenizer_pre == "qwen2") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
} else if (
tokenizer_pre == "olmo") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_OLMO;
} else if (
tokenizer_pre == "dbrx") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
@@ -12206,7 +12200,6 @@ struct llm_tokenizer_bpe {
case LLAMA_VOCAB_TYPE_BPE:
switch (vocab.type_pre) {
case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
case LLAMA_VOCAB_PRE_TYPE_DBRX:
word_collection = unicode_regex_split(text, {
// original regex from tokenizer.json
//"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
@@ -12266,13 +12259,6 @@ struct llm_tokenizer_bpe {
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
});
break;
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
word_collection = unicode_regex_split(text, {
// original regex from tokenizer.json
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
});
break;
default:
// default regex for BPE tokenization pre-processing
word_collection = unicode_regex_split(text, {

View File

@@ -81,9 +81,7 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10,
LLAMA_VOCAB_PRE_TYPE_OLMO = 11,
LLAMA_VOCAB_PRE_TYPE_DBRX = 12,
LLAMA_VOCAB_PRE_TYPE_OLMO = 10,
};
// note: these values should be synchronized with ggml_rope

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,106 +0,0 @@
ied 4 ½ months
__ggml_vocab_test__
Führer
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello world
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World
__ggml_vocab_test__
Hello World!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
Hello, world!
__ggml_vocab_test__
this is 🦙.cpp
__ggml_vocab_test__
w048 7tuijk dsdfhu
__ggml_vocab_test__
нещо на Български
__ggml_vocab_test__
កាន់តែពិសេសអាចខលចេញ
__ggml_vocab_test__
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
__ggml_vocab_test__
Hello
Hello
__ggml_vocab_test__
(
__ggml_vocab_test__
=
__ggml_vocab_test__
' era
__ggml_vocab_test__
Hello, y'all! How are you 😁 ?我想在apple工作1314151天
__ggml_vocab_test__
3
__ggml_vocab_test__
33
__ggml_vocab_test__
333
__ggml_vocab_test__
3333
__ggml_vocab_test__
33333
__ggml_vocab_test__
333333
__ggml_vocab_test__
3333333
__ggml_vocab_test__
33333333
__ggml_vocab_test__
333333333
__ggml_vocab_test__
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天 ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
__ggml_vocab_test__

View File

@@ -1,43 +0,0 @@
1122 220 19 220 26062 3951
37 50753 261
220
256
262
197
198
271
1406
1572
9707 1879
21927 1879
9707 4337
21927 4337
21927 4337 0
9707 11 1879 0
21927 11 1879 0
419 374 11162 99 247 13 10821
86 15 19 23 220 22 83 1963 41808 11472 2940 16739
78762 14144 1456 13073 63471 33594 3038 133178 79012
146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 147805 148301 147270 44258 223 146848
145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 320 3243 42365 429 702 1181 1828 3950 8
9707
21927
220 21927
256 21927
262 21927
262 21927 198 262 21927
320
198 284
6 11385
9707 11 379 64848 0 2585 525 498 26525 223 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216
18
18 18
18 18 18
18 18 18 18
18 18 18 18 18
18 18 18 18 18 18
18 18 18 18 18 18 18
18 18 18 18 18 18 18 18
18 18 18 18 18 18 18 18 18
198 4710 14731 65497 7847 1572 2303 78672 10947 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 11162 99 247 149955 220 18 220 18 18 220 18 18 18 220 18 18 18 18 220 18 18 18 18 18 220 18 18 18 18 18 18 220 18 18 18 18 18 18 18 220 18 18 18 18 18 18 18 18 220 18 13 18 220 18 496 18 220 18 1112 18 220 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 144534 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216 55460 53237 18658 14144 1456 13073 63471 33594 3038 133178 79012 3355 4605 4605 13874 13874 73594 3014 3014 28149 17085 2928 26610 7646 358 3003 1012 364 83 813 566 594 1052 11 364 787 498 2704 30 364 44 537 2704 358 3278 1281 432 11 364 35 498 1075 1045 15243 30 1205 6 42612 264 63866 43

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -93,14 +93,11 @@ help_s = (
"specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
)
parser.add_argument("-s", "--show", help=help_s)
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
known_args, unknown_args = parser.parse_known_args()
logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO)
if unknown_args:
logger.error(f"Received unknown args: {unknown_args}.\n")
logger.error(f"Received unknown args: {unknown_args}.")
parser.print_help()
sys.exit(1)
@@ -113,7 +110,7 @@ if input_file is None:
input_file = sqlite_files[0]
if input_file is None:
logger.error("Cannot find a suitable input file, please provide one.\n")
logger.error("Cannot find a suitable input file, please provide one.")
parser.print_help()
sys.exit(1)
@@ -205,12 +202,12 @@ elif repo is not None:
hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)
if hexsha8_baseline is None:
logger.error("No baseline was provided and did not find data for any master branch commits.\n")
logger.error("No baseline was provided and did not find data for any master branch commits.")
parser.print_help()
sys.exit(1)
else:
logger.error("No baseline was provided and the current working directory "
"is not part of a git repository from which a baseline could be inferred.\n")
"is not part of a git repository from which a baseline could be inferred.")
parser.print_help()
sys.exit(1)
@@ -241,7 +238,7 @@ elif repo is not None:
break
if hexsha8_compare is None:
logger.error("No compare target was provided and did not find data for any non-master commits.\n")
logger.error("No compare target was provided and did not find data for any non-master commits.")
parser.print_help()
sys.exit(1)
else:
@@ -364,7 +361,7 @@ if "gpu_info" in show:
headers = [PRETTY_NAMES[p] for p in show]
headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
print(tabulate( # noqa: NP100
logger.info(tabulate(
table,
headers=headers,
floatfmt=".2f",

View File

@@ -1,3 +1,6 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Permission is hereby granted, free of charge, to any person obtaining
@@ -582,15 +585,15 @@ class tinyBLAS_Q0_ARM {
};
#endif // __ARM_FEATURE_DOTPROD
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
#if defined(__AVX2__) || defined(__AVX512F__)
template <typename TA, typename TB, typename TC>
class tinyBLAS_Q0_AVX {
class tinyBLAS_Q0_AVX2 {
public:
tinyBLAS_Q0_AVX(int64_t k,
const TA *A, int64_t lda,
const TB *B, int64_t ldb,
TC *C, int64_t ldc,
int ith, int nth)
tinyBLAS_Q0_AVX2(int64_t k,
const TA *A, int64_t lda,
const TB *B, int64_t ldb,
TC *C, int64_t ldc,
int ith, int nth)
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
}
@@ -725,34 +728,14 @@ class tinyBLAS_Q0_AVX {
__m256 Cv[RN][RM] = {};
for (int64_t l = 0; l < k; ++l)
for (int64_t j = 0; j < RN; ++j)
for (int64_t i = 0; i < RM; ++i) {
#if defined(__AVX2__)
__m256 udTmp = updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
load(A + lda * (ii + i) + l)),
_mm256_sign_epi8(load(B + ldb * (jj + j) + l),
load(A + lda * (ii + i) + l)));
#else
__m128i ali0 = load0(A + lda * (ii + i) + l);
__m128i ali1 = load1(A + lda * (ii + i) + l);
__m128i blj0 = load0(B + ldb * (jj + j) + l);
__m128i blj1 = load1(B + ldb * (jj + j) + l);
__m128i sepAA0 = _mm_sign_epi8(ali0, ali0);
__m128i sepAA1 = _mm_sign_epi8(ali1, ali1);
__m128i sepBA0 = _mm_sign_epi8(blj0, ali0);
__m128i sepBA1 = _mm_sign_epi8(blj1, ali1);
// updot
const __m128i oneFill = _mm_set1_epi16(1);
__m128i mad0 = _mm_maddubs_epi16(sepAA0, sepBA0);
__m128i mad1 = _mm_maddubs_epi16(sepAA1, sepBA1);
__m256 udTmp = _mm256_cvtepi32_ps(MM256_SET_M128I(_mm_madd_epi16(oneFill, mad1), _mm_madd_epi16(oneFill, mad0)));
#endif
for (int64_t i = 0; i < RM; ++i)
Cv[j][i] = madd(_mm256_set1_ps(unhalf(A[lda * (ii + i) + l].d) *
unhalf(B[ldb * (jj + j) + l].d)),
udTmp,
Cv[j][i]);
}
updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
load(A + lda * (ii + i) + l)),
_mm256_sign_epi8(load(B + ldb * (jj + j) + l),
load(A + lda * (ii + i) + l))),
Cv[j][i]);
for (int64_t j = 0; j < RN; ++j)
for (int64_t i = 0; i < RM; ++i)
C[ldc * (jj + j) + (ii + i)] = hsum(Cv[j][i]);
@@ -763,28 +746,10 @@ class tinyBLAS_Q0_AVX {
return _mm256_loadu_si256((const __m256i *)b->qs);
}
inline __m128i load0(const block_q8_0 *b) {
return _mm_loadu_si128((const __m128i *)b->qs);
}
inline __m128i load1(const block_q8_0 *b) {
return _mm_loadu_si128(((const __m128i *)b->qs) + 1);
}
inline __m256i load(const block_q4_0 *b) {
return _mm256_sub_epi8(denibble(b->qs), _mm256_set1_epi8(8));
}
inline __m128i load0(const block_q4_0 *b) {
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), x), _mm_set1_epi8(8));
}
inline __m128i load1(const block_q4_0 *b) {
const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)), _mm_set1_epi8(8));
}
inline __m256 updot(__m256i u, __m256i s) {
__m256i res;
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
@@ -812,7 +777,7 @@ class tinyBLAS_Q0_AVX {
const int ith;
const int nth;
};
#endif // __AVX__
#endif // __AVX2__
} // namespace
@@ -963,8 +928,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
case GGML_TYPE_Q8_0: {
if (Btype != GGML_TYPE_Q8_0)
return false;
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
tinyBLAS_Q0_AVX<block_q8_0, block_q8_0, float> tb{
#if defined(__AVX2__) || defined(__AVX512F__)
tinyBLAS_Q0_AVX2<block_q8_0, block_q8_0, float> tb{
k, (const block_q8_0 *)A, lda,
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
@@ -987,8 +952,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
case GGML_TYPE_Q4_0: {
if (Btype != GGML_TYPE_Q8_0)
return false;
#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
tinyBLAS_Q0_AVX<block_q4_0, block_q8_0, float> tb{
#if defined(__AVX2__) || defined(__AVX512F__)
tinyBLAS_Q0_AVX2<block_q4_0, block_q8_0, float> tb{
k, (const block_q4_0 *)A, lda,
(const block_q8_0 *)B, ldb,
(float *)C, ldc,

View File

@@ -84,7 +84,6 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf)
# build test-tokenizer-1-bpe target once and add many tests
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)