ci : enable git lfs for build.yml

Revert "tmp : dummy change to trigger ci"
This reverts commit 97e40df5d6.
2026-02-19 14:13:22 +02:00 · 2024-05-08 10:53:02 +03:00 · 2024-05-08 10:42:25 +03:00 · 2024-05-08 10:42:11 +03:00 · 2024-05-08 10:30:25 +03:00 · 2024-05-08 10:24:53 +03:00
36 changed files with 126 additions and 336 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+models/ggml-vocab-*.gguf filter=lfs diff=lfs merge=lfs -text
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -33,6 +33,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Dependencies
@@ -91,6 +92,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Dependencies
@@ -153,6 +155,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -188,6 +192,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -211,6 +217,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Dependencies
@@ -285,6 +292,8 @@ jobs:
 #      - name: Clone
 #        id: checkout
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Dependencies
 #        id: depends
@@ -319,6 +328,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -347,6 +358,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -369,6 +382,8 @@ jobs:

    steps:
      - uses: actions/checkout@v2
+        with:
+          lfs: true

      - name: add oneAPI to apt
        shell: bash
@@ -393,6 +408,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Build
        id: cmake_build
@@ -410,6 +427,8 @@ jobs:

    steps:
      - uses: actions/checkout@v2
+        with:
+          lfs: true

      - name: add oneAPI to apt
        shell: bash
@@ -434,6 +453,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Build
        id: cmake_build
@@ -454,6 +475,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -485,6 +508,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -514,6 +539,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v1
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -543,6 +570,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v1
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -576,6 +605,8 @@ jobs:
      - name: Clone
        id: checkout
        uses: actions/checkout@v1
+        with:
+          lfs: true

      - name: Dependencies
        id: depends
@@ -606,6 +637,8 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Setup ${{ matrix.sys }}
        uses: msys2/setup-msys2@v2
@@ -687,6 +720,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Clone Kompute submodule
@@ -833,6 +867,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - uses: Jimver/cuda-toolkit@v0.2.11
@@ -906,6 +941,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Install
@@ -947,6 +983,8 @@ jobs:
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Build Xcode project
        run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
@@ -957,6 +995,8 @@ jobs:
    steps:
      - name: Clone
        uses: actions/checkout@v4
+        with:
+          lfs: true

      - name: Set up JDK
        uses: actions/setup-java@v3
@@ -979,7 +1019,9 @@ jobs:
 #    runs-on: macos-12
 #    steps:
 #    - name: Clone
-#      uses: actions/checkout@v4
+#      uses: actions/checkout@#v4
+#        with:
+#          lfs: true
 #
 #    - name: Build
 #      uses: cross-platform-actions/action@v0.19.0
@@ -1012,6 +1054,7 @@ jobs:
        id: checkout
        uses: actions/checkout@v4
        with:
+          lfs: true
          fetch-depth: 0

      - name: Determine tag name
@@ -1077,6 +1120,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Dependencies
 #        run: |
@@ -1101,6 +1146,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Dependencies
 #        run: |
@@ -1125,6 +1172,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Dependencies
 #        run: |
@@ -1155,6 +1204,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Add msbuild to PATH
 #        uses: microsoft/setup-msbuild@v1
@@ -1194,6 +1245,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Add msbuild to PATH
 #        uses: microsoft/setup-msbuild@v1
@@ -1240,6 +1293,8 @@ jobs:
 #    steps:
 #      - name: Clone
 #        uses: actions/checkout@v4
+#        with:
+#          lfs: true
 #
 #      - name: Dependencies
 #        run: |
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -13,14 +13,16 @@ jobs:
  run:
    runs-on: ubuntu-20.04
    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install build-essential gcc-8 lcov

+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+
      - name: Build
        run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests

--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -67,9 +67,7 @@ models = [
    {"name": "gpt-2",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
    {"name": "refact",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
    {"name": "command-r",      "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
-    {"name": "qwen2",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
    {"name": "olmo",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
-    {"name": "dbrx",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
 ]

 # make directory "models/tokenizers" if it doesn't exist
@@ -153,8 +151,6 @@ for model in models:
    # print the "pre_tokenizer" content from the tokenizer.json
    with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
        cfg = json.load(f)
-        normalizer = cfg["normalizer"]
-        logger.info("normalizer: " + json.dumps(normalizer, indent=4))
        pre_tokenizer = cfg["pre_tokenizer"]
        logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))

--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -314,15 +314,9 @@ class Model(ABC):
        if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
            # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
            res = "command-r"
-        if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
-            # ref: https://huggingface.co/Qwen/Qwen1.5-7B
-            res = "qwen2"
        if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
            # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
            res = "olmo"
-        if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
-            # ref: https://huggingface.co/databricks/dbrx-instruct
-            res = "dbrx"

        if res is None:
            logger.warning("\n")
--- a/convert.py
+++ b/convert.py
@@ -1508,27 +1508,25 @@ def main(args_in: list[str] | None = None) -> None:
    if args.big_endian:
        endianess = gguf.GGUFEndian.BIG

-    params = None
-    if args.pad_vocab or not args.vocab_only:
-        params = Params.load(model_plus)
-        if params.n_ctx == -1:
-            if args.ctx is None:
-                msg = """\
-                    The model doesn't have a context size, and you didn't specify one with --ctx
-                    Please specify one with --ctx:
-                     - LLaMA v1: --ctx 2048
-                     - LLaMA v2: --ctx 4096"""
-                parser.error(textwrap.dedent(msg))
-            params.n_ctx = args.ctx
+    params = Params.load(model_plus)
+    if params.n_ctx == -1:
+        if args.ctx is None:
+            msg = """\
+                The model doesn't have a context size, and you didn't specify one with --ctx
+                Please specify one with --ctx:
+                 - LLaMA v1: --ctx 2048
+                 - LLaMA v2: --ctx 4096"""
+            parser.error(textwrap.dedent(msg))
+        params.n_ctx = args.ctx

-        if args.outtype:
-            params.ftype = {
-                "f32": GGMLFileType.AllF32,
-                "f16": GGMLFileType.MostlyF16,
-                "q8_0": GGMLFileType.MostlyQ8_0,
-            }[args.outtype]
+    if args.outtype:
+        params.ftype = {
+            "f32": GGMLFileType.AllF32,
+            "f16": GGMLFileType.MostlyF16,
+            "q8_0": GGMLFileType.MostlyQ8_0,
+        }[args.outtype]

-        logger.info(f"params = {params}")
+    logger.info(f"params = {params}")

    model_parent_path = model_plus.paths[0].parent
    vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
@@ -1541,17 +1539,6 @@ def main(args_in: list[str] | None = None) -> None:
        if not args.outfile:
            raise ValueError("need --outfile if using --vocab-only")
        outfile = args.outfile
-        if params is None:
-            params = Params(
-                n_vocab    = vocab.vocab_size,
-                n_embd     = 1,
-                n_layer    = 1,
-                n_ctx      = 1,
-                n_ff       = 1,
-                n_head     = 1,
-                n_head_kv  = 1,
-                f_norm_eps = 1e-5,
-            )
        OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
                                    endianess=endianess, pad_vocab=args.pad_vocab)
        logger.info(f"Wrote {outfile}")
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`.

    `content`: Set the text to tokenize.

-    `add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted.  Default: `false`
+    Note that a special `BOS` token is never inserted.

 - **POST** `/detokenize`: Convert tokens to text.

--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3647,8 +3647,7 @@ int main(int argc, char ** argv) {

        std::vector<llama_token> tokens;
        if (body.count("content") != 0) {
-            const bool add_special = json_value(body, "add_special", false);
-            tokens = ctx_server.tokenize(body["content"], add_special);
+            tokens = ctx_server.tokenize(body["content"], false);
        }
        const json data = format_tokenizer_response(tokens);
        return res.set_content(data.dump(), "application/json; charset=utf-8");
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -7,7 +7,6 @@ Feature: llama.cpp server
    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
    And   a model file test-model.gguf
    And   a model alias tinyllama-2
-    And   BOS token is 1
    And   42 as server seed
      # KV Cache corresponds to the total amount of tokens
      # that can be stored across all independent sequences: #4130
@@ -92,18 +91,7 @@ Feature: llama.cpp server
    """
    What is the capital of France ?
    """
-    Then tokens can be detokenized
-    And  tokens do not begin with BOS
-
-  Scenario: Tokenize w/ BOS
-    Given adding special tokens
-    When  tokenizing:
-    """
-    What is the capital of Germany?
-    """
-    Then  tokens begin with BOS
-    Given first token is removed
-    Then  tokens can be detokenized
+    Then tokens can be detokenize

  Scenario: Models available
    Given available models
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -376,11 +376,6 @@ def step_seed(context, seed):
        context.seed.append(seed)


-@step('BOS token is {bos:d}')
-def step_bos_token(context, bos):
-    context.bos = bos
-
-
@step('a prefix prompt')
 def step_prompt_prefix(context):
    context.prompt_prefix = context_text(context)
@@ -661,29 +656,21 @@ async def all_embeddings_are_generated(context):
        assert_embeddings(context.tasks_result.pop().pop())


-@step('adding special tokens')
-def step_tokenize_set_add_special(context):
-    context.tokenize_add_special = True
-
-
@step('tokenizing')
@async_run_until_complete
 async def step_tokenize(context):
    context.tokenized_text = context_text(context)
    async with aiohttp.ClientSession() as session:
-        tokenize_args = {
-            "content": context.tokenized_text,
-        }
-        if getattr(context, 'tokenize_add_special', None) is not None:
-            tokenize_args['add_special'] = context.tokenize_add_special
        async with session.post(f'{context.base_url}/tokenize',
-                                json=tokenize_args) as response:
+                                json={
+                                    "content": context.tokenized_text,
+                                }) as response:
            assert response.status == 200
            tokenize_json = await response.json()
            context.tokens = tokenize_json['tokens']


-@step('tokens can be detokenized')
+@step('tokens can be detokenize')
@async_run_until_complete
 async def step_detokenize(context):
    assert len(context.tokens) > 0
@@ -698,21 +685,6 @@ async def step_detokenize(context):
            assert context.tokenized_text == detokenize_json['content'].strip()


-@step('tokens begin with BOS')
-def step_strings_for_tokenization(context):
-    assert context.tokens[0] == context.bos
-
-
-@step('tokens do not begin with BOS')
-def step_strings_for_tokenization(context):
-    assert context.tokens[0] != context.bos
-
-
-@step('first token is removed')
-def step_strings_for_tokenization(context):
-    context.tokens = context.tokens[1:]
-
-
@step('an OPTIONS request is sent from {origin}')
@async_run_until_complete
 async def step_options_request(context, origin):
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -49,18 +49,18 @@ extern bool server_log_json;
 #define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
 #define LOG_INFO(   MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)

-static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra);
+static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra);

 template <typename T>
-static T json_value(const json & body, const std::string & key, const T & default_value) {
+static T json_value(const json &body, const std::string &key, const T &default_value) {
    // Fallback null to default value
-    if (body.contains(key) && !body.at(key).is_null()) {
+    if (body.contains(key) && !body.at(key).is_null()){
        try {
-            return body.at(key);
-        } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) {
-            std::stringstream ss;
-            ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value.";
-            LOG_WARNING(ss.str().c_str(), body);
+            return body.value(key, default_value);
+        }
+        catch (nlohmann::json_abi_v3_11_3::detail::type_error const&){
+            std::string message = "Wrong type supplied for parameter '" + key + "'. Expected '" + typeid(default_value).name() + "', using default value.";
+            server_log("WARN", __func__, __LINE__, message.c_str(), body);
            return default_value;
        }
    } else {
@@ -68,16 +68,16 @@ static T json_value(const json & body, const std::string & key, const T & defaul
    }
 }

-static inline void server_log(const char * level, const char * function, int line, const char * message, const json & extra) {
+static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
    std::stringstream ss_tid;
    ss_tid << std::this_thread::get_id();
-    json log = json{
+    json log = nlohmann::ordered_json{
        {"tid",       ss_tid.str()},
        {"timestamp", time(nullptr)},
    };

    if (server_log_json) {
-        log.merge_patch({
+        log.merge_patch( {
            {"level",    level},
            {"function", function},
            {"line",     line},
@@ -98,7 +98,7 @@ static inline void server_log(const char * level, const char * function, int lin
        }
        std::stringstream ss;
        ss << buf << " |";
-        for (const auto & el : log.items())
+        for (const auto& el : log.items())
        {
            const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
            ss << " " << el.key() << "=" << value;
--- a/llama.cpp
+++ b/llama.cpp
@@ -4391,15 +4391,9 @@ static void llm_load_vocab(
            } else if (
                tokenizer_pre == "command-r") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
-            } else if (
-                tokenizer_pre == "qwen2") {
-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
            } else if (
                tokenizer_pre == "olmo") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_OLMO;
-            } else if (
-                tokenizer_pre == "dbrx") {
-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
            } else {
                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
            }
@@ -12206,7 +12200,6 @@ struct llm_tokenizer_bpe {
            case LLAMA_VOCAB_TYPE_BPE:
                switch (vocab.type_pre) {
                    case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
-                    case LLAMA_VOCAB_PRE_TYPE_DBRX:
                        word_collection = unicode_regex_split(text, {
                            // original regex from tokenizer.json
                            //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
@@ -12266,13 +12259,6 @@ struct llm_tokenizer_bpe {
                            "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
                        });
                        break;
-                    case LLAMA_VOCAB_PRE_TYPE_QWEN2:
-                        word_collection = unicode_regex_split(text, {
-                            // original regex from tokenizer.json
-                            // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
-                            "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
-                        });
-                        break;
                    default:
                        // default regex for BPE tokenization pre-processing
                        word_collection = unicode_regex_split(text, {
--- a/llama.h
+++ b/llama.h
@@ -81,9 +81,7 @@ extern "C" {
        LLAMA_VOCAB_PRE_TYPE_GPT2           = 7,
        LLAMA_VOCAB_PRE_TYPE_REFACT         = 8,
        LLAMA_VOCAB_PRE_TYPE_COMMAND_R      = 9,
-        LLAMA_VOCAB_PRE_TYPE_QWEN2          = 10,
-        LLAMA_VOCAB_PRE_TYPE_OLMO           = 11,
-        LLAMA_VOCAB_PRE_TYPE_DBRX           = 12,
+        LLAMA_VOCAB_PRE_TYPE_OLMO           = 10,
    };

    // note: these values should be synchronized with ggml_rope
--- a/models/ggml-vocab-aquila.gguf
+++ b/models/ggml-vocab-aquila.gguf
--- a/models/ggml-vocab-baichuan.gguf
+++ b/models/ggml-vocab-baichuan.gguf
--- a/models/ggml-vocab-bert-bge.gguf
+++ b/models/ggml-vocab-bert-bge.gguf
--- a/models/ggml-vocab-command-r.gguf
+++ b/models/ggml-vocab-command-r.gguf
--- a/models/ggml-vocab-deepseek-coder.gguf
+++ b/models/ggml-vocab-deepseek-coder.gguf
--- a/models/ggml-vocab-deepseek-llm.gguf
+++ b/models/ggml-vocab-deepseek-llm.gguf
--- a/models/ggml-vocab-falcon.gguf
+++ b/models/ggml-vocab-falcon.gguf
--- a/models/ggml-vocab-gpt-2.gguf
+++ b/models/ggml-vocab-gpt-2.gguf
--- a/models/ggml-vocab-gpt-neox.gguf
+++ b/models/ggml-vocab-gpt-neox.gguf
--- a/models/ggml-vocab-gpt2.gguf
+++ b/models/ggml-vocab-gpt2.gguf
--- a/models/ggml-vocab-llama-bpe.gguf
+++ b/models/ggml-vocab-llama-bpe.gguf
--- a/models/ggml-vocab-llama-spm.gguf
+++ b/models/ggml-vocab-llama-spm.gguf
--- a/models/ggml-vocab-mpt.gguf
+++ b/models/ggml-vocab-mpt.gguf
--- a/models/ggml-vocab-phi-3.gguf
+++ b/models/ggml-vocab-phi-3.gguf
--- a/models/ggml-vocab-qwen2.gguf
+++ b/models/ggml-vocab-qwen2.gguf
--- a/models/ggml-vocab-qwen2.gguf.inp
+++ b/models/ggml-vocab-qwen2.gguf.inp
@@ -1,106 +0,0 @@
-ied 4 ½ months
-__ggml_vocab_test__
-Führer
-__ggml_vocab_test__
-
-__ggml_vocab_test__
- 
-__ggml_vocab_test__
-  
-__ggml_vocab_test__
-   
-__ggml_vocab_test__
-	
-__ggml_vocab_test__
-
-
-__ggml_vocab_test__
-
-
-
-__ggml_vocab_test__
-
-
-
-
-__ggml_vocab_test__
-	
-
-__ggml_vocab_test__
-Hello world
-__ggml_vocab_test__
- Hello world
-__ggml_vocab_test__
-Hello World
-__ggml_vocab_test__
- Hello World
-__ggml_vocab_test__
- Hello World!
-__ggml_vocab_test__
-Hello, world!
-__ggml_vocab_test__
- Hello, world!
-__ggml_vocab_test__
- this is 🦙.cpp
-__ggml_vocab_test__
-w048 7tuijk dsdfhu
-__ggml_vocab_test__
-нещо на Български
-__ggml_vocab_test__
-កាន់តែពិសេសអាចខលចេញ
-__ggml_vocab_test__
-🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
-__ggml_vocab_test__
-Hello
-__ggml_vocab_test__
- Hello
-__ggml_vocab_test__
-  Hello
-__ggml_vocab_test__
-   Hello
-__ggml_vocab_test__
-    Hello
-__ggml_vocab_test__
-    Hello
-    Hello
-__ggml_vocab_test__
- (
-__ggml_vocab_test__
-
- =
-__ggml_vocab_test__
-' era
-__ggml_vocab_test__
-Hello, y'all! How are you 😁 ?我想在apple工作1314151天～
-__ggml_vocab_test__
-3
-__ggml_vocab_test__
-33
-__ggml_vocab_test__
-333
-__ggml_vocab_test__
-3333
-__ggml_vocab_test__
-33333
-__ggml_vocab_test__
-333333
-__ggml_vocab_test__
-3333333
-__ggml_vocab_test__
-33333333
-__ggml_vocab_test__
-333333333
-__ggml_vocab_test__
-
- 
-
- 
-
-
- 	 		 	
-  
-   
-    
-     
-🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天～ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
-__ggml_vocab_test__
--- a/models/ggml-vocab-qwen2.gguf.out
+++ b/models/ggml-vocab-qwen2.gguf.out
@@ -1,43 +0,0 @@
- 1122 220 19 220 26062 3951
- 37 50753 261
-
- 220
- 256
- 262
- 197
- 198
- 271
- 1406
- 1572
- 9707 1879
- 21927 1879
- 9707 4337
- 21927 4337
- 21927 4337 0
- 9707 11 1879 0
- 21927 11 1879 0
- 419 374 11162 99 247 13 10821
- 86 15 19 23 220 22 83 1963 41808 11472 2940 16739
- 78762 14144 1456 13073 63471 33594 3038 133178 79012
- 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 147805 148301 147270 44258 223 146848
- 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 320 3243 42365 429 702 1181 1828 3950 8
- 9707
- 21927
- 220 21927
- 256 21927
- 262 21927
- 262 21927 198 262 21927
- 320
- 198 284
- 6 11385
- 9707 11 379 64848 0 2585 525 498 26525 223 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216
- 18
- 18 18
- 18 18 18
- 18 18 18 18
- 18 18 18 18 18
- 18 18 18 18 18 18
- 18 18 18 18 18 18 18
- 18 18 18 18 18 18 18 18
- 18 18 18 18 18 18 18 18 18
- 198 4710 14731 65497 7847 1572 2303 78672 10947 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 11162 99 247 149955 220 18 220 18 18 220 18 18 18 220 18 18 18 18 220 18 18 18 18 18 220 18 18 18 18 18 18 220 18 18 18 18 18 18 18 220 18 18 18 18 18 18 18 18 220 18 13 18 220 18 496 18 220 18 1112 18 220 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 144534 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216 55460 53237 18658 14144 1456 13073 63471 33594 3038 133178 79012 3355 4605 4605 13874 13874 73594 3014 3014 28149 17085 2928 26610 7646 358 3003 1012 364 83 813 566 594 1052 11 364 787 498 2704 30 364 44 537 2704 358 3278 1281 432 11 364 35 498 1075 1045 15243 30 1205 6 42612 264 63866 43
--- a/models/ggml-vocab-refact.gguf
+++ b/models/ggml-vocab-refact.gguf
--- a/models/ggml-vocab-stablelm.gguf
+++ b/models/ggml-vocab-stablelm.gguf
--- a/models/ggml-vocab-starcoder.gguf
+++ b/models/ggml-vocab-starcoder.gguf
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -93,14 +93,11 @@ help_s = (
    "specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
 )
 parser.add_argument("-s", "--show", help=help_s)
-parser.add_argument("--verbose", action="store_true", help="increase output verbosity")

 known_args, unknown_args = parser.parse_known_args()

-logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO)
-
 if unknown_args:
-    logger.error(f"Received unknown args: {unknown_args}.\n")
+    logger.error(f"Received unknown args: {unknown_args}.")
    parser.print_help()
    sys.exit(1)

@@ -113,7 +110,7 @@ if input_file is None:
        input_file = sqlite_files[0]

 if input_file is None:
-    logger.error("Cannot find a suitable input file, please provide one.\n")
+    logger.error("Cannot find a suitable input file, please provide one.")
    parser.print_help()
    sys.exit(1)

@@ -205,12 +202,12 @@ elif repo is not None:
    hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)

    if hexsha8_baseline is None:
-        logger.error("No baseline was provided and did not find data for any master branch commits.\n")
+        logger.error("No baseline was provided and did not find data for any master branch commits.")
        parser.print_help()
        sys.exit(1)
 else:
    logger.error("No baseline was provided and the current working directory "
-                 "is not part of a git repository from which a baseline could be inferred.\n")
+                 "is not part of a git repository from which a baseline could be inferred.")
    parser.print_help()
    sys.exit(1)

@@ -241,7 +238,7 @@ elif repo is not None:
            break

    if hexsha8_compare is None:
-        logger.error("No compare target was provided and did not find data for any non-master commits.\n")
+        logger.error("No compare target was provided and did not find data for any non-master commits.")
        parser.print_help()
        sys.exit(1)
 else:
@@ -364,7 +361,7 @@ if "gpu_info" in show:
 headers  = [PRETTY_NAMES[p] for p in show]
 headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]

-print(tabulate( # noqa: NP100
+logger.info(tabulate(
    table,
    headers=headers,
    floatfmt=".2f",
--- a/sgemm.cpp
+++ b/sgemm.cpp
@@ -1,3 +1,6 @@
+// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
+// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
+//
 // Copyright 2024 Mozilla Foundation
 //
 // Permission is hereby granted, free of charge, to any person obtaining
@@ -582,15 +585,15 @@ class tinyBLAS_Q0_ARM {
 };
 #endif // __ARM_FEATURE_DOTPROD

-#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
+#if defined(__AVX2__) || defined(__AVX512F__)
 template <typename TA, typename TB, typename TC>
-class tinyBLAS_Q0_AVX {
+class tinyBLAS_Q0_AVX2 {
  public:
-    tinyBLAS_Q0_AVX(int64_t k,
-                    const TA *A, int64_t lda,
-                    const TB *B, int64_t ldb,
-                    TC *C, int64_t ldc,
-                    int ith, int nth)
+    tinyBLAS_Q0_AVX2(int64_t k,
+                     const TA *A, int64_t lda,
+                     const TB *B, int64_t ldb,
+                     TC *C, int64_t ldc,
+                     int ith, int nth)
        : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
    }

@@ -725,34 +728,14 @@ class tinyBLAS_Q0_AVX {
            __m256 Cv[RN][RM] = {};
            for (int64_t l = 0; l < k; ++l)
                for (int64_t j = 0; j < RN; ++j)
-                    for (int64_t i = 0; i < RM; ++i) {
-#if defined(__AVX2__)
-                        __m256 udTmp = updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
-                                                              load(A + lda * (ii + i) + l)),
-                                             _mm256_sign_epi8(load(B + ldb * (jj + j) + l),
-                                                              load(A + lda * (ii + i) + l)));
-#else
-                        __m128i ali0 = load0(A + lda * (ii + i) + l);
-                        __m128i ali1 = load1(A + lda * (ii + i) + l);
-                        __m128i blj0 = load0(B + ldb * (jj + j) + l);
-                        __m128i blj1 = load1(B + ldb * (jj + j) + l);
-
-                        __m128i sepAA0 = _mm_sign_epi8(ali0, ali0);
-                        __m128i sepAA1 = _mm_sign_epi8(ali1, ali1);
-                        __m128i sepBA0 = _mm_sign_epi8(blj0, ali0);
-                        __m128i sepBA1 = _mm_sign_epi8(blj1, ali1);
-
-                        // updot
-                        const __m128i oneFill = _mm_set1_epi16(1);
-                        __m128i mad0 = _mm_maddubs_epi16(sepAA0, sepBA0);
-                        __m128i mad1 = _mm_maddubs_epi16(sepAA1, sepBA1);
-                        __m256 udTmp = _mm256_cvtepi32_ps(MM256_SET_M128I(_mm_madd_epi16(oneFill, mad1), _mm_madd_epi16(oneFill, mad0)));
-#endif
+                    for (int64_t i = 0; i < RM; ++i)
                        Cv[j][i] = madd(_mm256_set1_ps(unhalf(A[lda * (ii + i) + l].d) *
                                                       unhalf(B[ldb * (jj + j) + l].d)),
-                                                       udTmp,
-                                                       Cv[j][i]);
-                    }
+                                        updot(_mm256_sign_epi8(load(A + lda * (ii + i) + l),
+                                                               load(A + lda * (ii + i) + l)),
+                                              _mm256_sign_epi8(load(B + ldb * (jj + j) + l),
+                                                               load(A + lda * (ii + i) + l))),
+                                        Cv[j][i]);
            for (int64_t j = 0; j < RN; ++j)
                for (int64_t i = 0; i < RM; ++i)
                    C[ldc * (jj + j) + (ii + i)] = hsum(Cv[j][i]);
@@ -763,28 +746,10 @@ class tinyBLAS_Q0_AVX {
        return _mm256_loadu_si256((const __m256i *)b->qs);
    }

-    inline __m128i load0(const block_q8_0 *b) {
-        return _mm_loadu_si128((const __m128i *)b->qs);
-    }
-
-    inline __m128i load1(const block_q8_0 *b) {
-        return _mm_loadu_si128(((const __m128i *)b->qs) + 1);
-    }
-
    inline __m256i load(const block_q4_0 *b) {
        return _mm256_sub_epi8(denibble(b->qs), _mm256_set1_epi8(8));
    }

-    inline __m128i load0(const block_q4_0 *b) {
-        const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
-        return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), x), _mm_set1_epi8(8));
-    }
-
-    inline __m128i load1(const block_q4_0 *b) {
-        const __m128i x = _mm_loadu_si128((const __m128i *)(b->qs));
-        return _mm_sub_epi8(_mm_and_si128(_mm_set1_epi8(15), _mm_srli_epi16(x, 4)), _mm_set1_epi8(8));
-    }
-
    inline __m256 updot(__m256i u, __m256i s) {
        __m256i res;
 #if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
@@ -812,7 +777,7 @@ class tinyBLAS_Q0_AVX {
    const int ith;
    const int nth;
 };
-#endif // __AVX__
+#endif // __AVX2__

 } // namespace

@@ -963,8 +928,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
    case GGML_TYPE_Q8_0: {
        if (Btype != GGML_TYPE_Q8_0)
           return false;
-#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
-        tinyBLAS_Q0_AVX<block_q8_0, block_q8_0, float> tb{
+#if defined(__AVX2__) || defined(__AVX512F__)
+        tinyBLAS_Q0_AVX2<block_q8_0, block_q8_0, float> tb{
            k, (const block_q8_0 *)A, lda,
            (const block_q8_0 *)B, ldb,
            (float *)C, ldc,
@@ -987,8 +952,8 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
    case GGML_TYPE_Q4_0: {
        if (Btype != GGML_TYPE_Q8_0)
            return false;
-#if defined(__AVX2__) || defined(__AVX512F__) || defined(__AVX__)
-        tinyBLAS_Q0_AVX<block_q4_0, block_q8_0, float> tb{
+#if defined(__AVX2__) || defined(__AVX512F__)
+        tinyBLAS_Q0_AVX2<block_q4_0, block_q8_0, float> tb{
            k, (const block_q4_0 *)A, lda,
            (const block_q8_0 *)B, ldb,
            (float *)C, ldc,
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -84,7 +84,6 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder         ARGS ${CMAKE
 llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
 llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
 llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
-llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf)

 # build test-tokenizer-1-bpe target once and add many tests
 add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
Author	SHA1	Message	Date
Georgi Gerganov	0fc560fe96	ci : enable git lfs for build.yml	2024-05-08 10:53:02 +03:00
Georgi Gerganov	db5c2ad30e	Revert "tmp : dummy change to trigger ci" This reverts commit `97e40df5d6`.	2024-05-08 10:42:25 +03:00
Georgi Gerganov	97e40df5d6	tmp : dummy change to trigger ci	2024-05-08 10:42:11 +03:00
Georgi Gerganov	837f426f19	ci : try lfs true	2024-05-08 10:30:25 +03:00
Georgi Gerganov	9d13776f34	ci : deps before checkout	2024-05-08 10:24:53 +03:00
Georgi Gerganov	2c7ff2c7ae	ci : add git-lfs ggml-ci	2024-05-08 10:18:47 +03:00
Georgi Gerganov	0dc0e9aa42	models : convert vocab files to LFS ggml-ci	2024-05-08 09:54:38 +03:00
				`@@ -0,0 +1 @@`
				`models/ggml-vocab-*.gguf filter=lfs diff=lfs merge=lfs -text`