mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-04-16 16:27:32 +03:00
Compare commits
19 Commits
b1634
...
gg/swiftui
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
865066621b | ||
|
|
5c5bdba605 | ||
|
|
262fd466f3 | ||
|
|
4ed98b90bc | ||
|
|
9629448716 | ||
|
|
d36ca171b6 | ||
|
|
da44d45265 | ||
|
|
a520e87ed6 | ||
|
|
ce1df8124a | ||
|
|
ff87313db8 | ||
|
|
6a8680204c | ||
|
|
afd336f7a6 | ||
|
|
6744dbe924 | ||
|
|
cafcd4f895 | ||
|
|
c50e400163 | ||
|
|
20a68a7030 | ||
|
|
55e87c3749 | ||
|
|
873637afc7 | ||
|
|
0353a18401 |
@@ -23,3 +23,6 @@ insert_final_newline = unset
|
||||
|
||||
[examples/server/public/*]
|
||||
indent_size = 2
|
||||
|
||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||
indent_style = tab
|
||||
|
||||
11
README.md
11
README.md
@@ -97,7 +97,18 @@ as the main playground for developing new features for the [ggml](https://github
|
||||
- [X] [Persimmon 8B](https://github.com/ggerganov/llama.cpp/pull/3410)
|
||||
- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417)
|
||||
- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553)
|
||||
- [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi)
|
||||
- [X] [StableLM-3b-4e1t](https://github.com/ggerganov/llama.cpp/pull/3586)
|
||||
- [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek)
|
||||
- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen)
|
||||
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
|
||||
|
||||
**Multimodal models:**
|
||||
|
||||
- [x] [Llava 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e)
|
||||
- [x] [Bakllava](https://huggingface.co/models?search=SkunkworksAI/Bakllava)
|
||||
- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5)
|
||||
- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V)
|
||||
|
||||
|
||||
**Bindings:**
|
||||
|
||||
@@ -71,7 +71,7 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd)
|
||||
|
||||
struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
|
||||
float scale = 1.0f; // xavier
|
||||
switch (tensor->n_dims) {
|
||||
switch (ggml_n_dims(tensor)) {
|
||||
case 1:
|
||||
scale /= sqrtf((float) tensor->ne[0]);
|
||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||
@@ -119,7 +119,7 @@ struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct
|
||||
}
|
||||
|
||||
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd) {
|
||||
switch (tensor->n_dims) {
|
||||
switch (ggml_n_dims(tensor)) {
|
||||
case 1:
|
||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]);
|
||||
@@ -183,25 +183,27 @@ float fclamp(const float v, const float min, const float max) {
|
||||
}
|
||||
|
||||
void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
|
||||
GGML_ASSERT(tensor->n_dims == 1);
|
||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||
GGML_ASSERT(tensor->ne[1] == 1);
|
||||
GGML_ASSERT(tensor->ne[2] == 1);
|
||||
GGML_ASSERT(tensor->ne[3] == 1);
|
||||
}
|
||||
|
||||
void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
|
||||
GGML_ASSERT(tensor->n_dims == 2);
|
||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||
GGML_ASSERT(tensor->ne[2] == 1);
|
||||
GGML_ASSERT(tensor->ne[3] == 1);
|
||||
}
|
||||
|
||||
void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
|
||||
GGML_ASSERT(tensor->n_dims == 3);
|
||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||
GGML_ASSERT(tensor->ne[2] == ne2);
|
||||
GGML_ASSERT(tensor->ne[3] == 1);
|
||||
}
|
||||
|
||||
void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
||||
GGML_ASSERT(tensor->n_dims == 4);
|
||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||
GGML_ASSERT(tensor->ne[2] == ne2);
|
||||
@@ -225,8 +227,8 @@ int64_t get_example_targets_batch(
|
||||
bool sample_random_offsets
|
||||
) {
|
||||
GGML_ASSERT(samples_count > 0);
|
||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||
GGML_ASSERT(target_probs->n_dims == 3);
|
||||
GGML_ASSERT(ggml_is_matrix(tokens_input));
|
||||
GGML_ASSERT(ggml_is_3d(target_probs));
|
||||
int64_t n_vocab = target_probs->ne[0];
|
||||
int64_t n_tokens = tokens_input->ne[0];
|
||||
int64_t n_batch = tokens_input->ne[1];
|
||||
|
||||
323
convert.py
323
convert.py
@@ -10,6 +10,7 @@ import itertools
|
||||
import json
|
||||
import math
|
||||
import mmap
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import signal
|
||||
@@ -18,15 +19,15 @@ import sys
|
||||
import time
|
||||
import zipfile
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import OrderedDict
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
|
||||
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, TypeVar, cast
|
||||
|
||||
import numpy as np
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
|
||||
import os
|
||||
if 'NO_LOCAL_GGUF' not in os.environ:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
||||
import gguf
|
||||
@@ -327,127 +328,138 @@ class Params:
|
||||
return params
|
||||
|
||||
|
||||
#
|
||||
# vocab
|
||||
#
|
||||
class VocabLoader:
|
||||
def __init__(self, params: Params, fname_tokenizer: Path) -> None:
|
||||
try:
|
||||
from transformers import AutoTokenizer
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"To use VocabLoader, please install the `transformers` package. "
|
||||
"You can install it with `pip install transformers`."
|
||||
) from e
|
||||
|
||||
class BpeVocab:
|
||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
||||
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
|
||||
added_tokens: dict[str, int]
|
||||
if fname_added_tokens is not None:
|
||||
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
||||
added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
|
||||
try:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(str(fname_tokenizer), trust_remote_code=True)
|
||||
except ValueError:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(str(fname_tokenizer), use_fast=False, trust_remote_code=True)
|
||||
|
||||
self.added_tokens_dict: OrderedDict[str, int] = OrderedDict()
|
||||
|
||||
for tok, tokidx in sorted(self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]):
|
||||
if tokidx >= params.n_vocab or tokidx < self.tokenizer.vocab_size:
|
||||
continue
|
||||
|
||||
self.added_tokens_dict[tok] = tokidx
|
||||
|
||||
self.unk_token_id: int = self.tokenizer.unk_token_id
|
||||
self.specials: dict[str, int] = {
|
||||
tok: self.tokenizer.get_vocab()[tok]
|
||||
for tok in self.tokenizer.all_special_tokens
|
||||
}
|
||||
self.special_ids: set[int] = set(self.tokenizer.all_special_ids)
|
||||
self.vocab_size_base: int = self.tokenizer.vocab_size
|
||||
self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_dict)
|
||||
self.fname_tokenizer: Path = fname_tokenizer
|
||||
|
||||
vocab_file = "tokenizer.model"
|
||||
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
|
||||
if path_candidate is not None:
|
||||
self.spm = SentencePieceProcessor(str(path_candidate))
|
||||
print(self.spm.vocab_size(), self.vocab_size_base)
|
||||
else:
|
||||
# Fall back to trying to find the added tokens in tokenizer.json
|
||||
tokenizer_json_file = fname_tokenizer.parent / 'tokenizer.json'
|
||||
if not tokenizer_json_file.is_file():
|
||||
added_tokens = {}
|
||||
else:
|
||||
tokenizer_json = json.load(open(tokenizer_json_file, encoding="utf-8"))
|
||||
added_tokens = dict(
|
||||
(item['content'], item['id'])
|
||||
for item in tokenizer_json.get('added_tokens', [])
|
||||
# Added tokens here can be duplicates of the main vocabulary.
|
||||
if item['content'] not in self.bpe_tokenizer)
|
||||
self.spm = None
|
||||
|
||||
vocab_size: int = len(self.bpe_tokenizer)
|
||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||
actual_ids = sorted(added_tokens.values())
|
||||
if expected_ids != actual_ids:
|
||||
expected_end_id = vocab_size + len(actual_ids) - 1
|
||||
raise Exception(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range {vocab_size} - {expected_end_id}; got {actual_ids}")
|
||||
def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
tokenizer = self.tokenizer
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.get_vocab().items()}
|
||||
added_tokens_ids = set(self.added_tokens_dict.values())
|
||||
|
||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||
self.added_tokens_list = [text for (text, idx) in items]
|
||||
self.vocab_size_base: int = vocab_size
|
||||
self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
self.fname_added_tokens = fname_added_tokens
|
||||
for i in range(self.vocab_size_base):
|
||||
if i in added_tokens_ids:
|
||||
continue
|
||||
|
||||
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
tokenizer = self.bpe_tokenizer
|
||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.items()}
|
||||
text = reverse_vocab[i].encode("utf-8")
|
||||
yield text, self.get_token_score(i), self.get_token_type(i)
|
||||
|
||||
for i, _ in enumerate(tokenizer):
|
||||
yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
|
||||
def get_token_type(self, token_id: int) -> gguf.TokenType:
|
||||
toktype = gguf.TokenType.NORMAL
|
||||
|
||||
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
for text in self.added_tokens_list:
|
||||
score = -1000.0
|
||||
yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
|
||||
|
||||
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
yield from self.bpe_tokens()
|
||||
yield from self.added_tokens()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||
|
||||
|
||||
class SentencePieceVocab:
|
||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
||||
self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
|
||||
added_tokens: dict[str, int]
|
||||
if fname_added_tokens is not None:
|
||||
added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
|
||||
else:
|
||||
added_tokens = {}
|
||||
|
||||
vocab_size: int = self.sentencepiece_tokenizer.vocab_size()
|
||||
|
||||
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
||||
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
||||
actual_new_ids = sorted(new_tokens.keys())
|
||||
|
||||
if expected_new_ids != actual_new_ids:
|
||||
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
||||
|
||||
# Token pieces that were added to the base vocabulary.
|
||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||
self.vocab_size_base = vocab_size
|
||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||
self.fname_tokenizer = fname_tokenizer
|
||||
self.fname_added_tokens = fname_added_tokens
|
||||
|
||||
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
tokenizer = self.sentencepiece_tokenizer
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
piece = tokenizer.id_to_piece(i)
|
||||
text: bytes = piece.encode("utf-8")
|
||||
score: float = tokenizer.get_score(i)
|
||||
|
||||
toktype = gguf.TokenType.NORMAL
|
||||
if tokenizer.is_unknown(i):
|
||||
if self.spm is not None and token_id < self.spm.vocab_size():
|
||||
if self.spm.is_unknown(token_id):
|
||||
toktype = gguf.TokenType.UNKNOWN
|
||||
if tokenizer.is_control(i):
|
||||
if self.spm.is_control(token_id):
|
||||
toktype = gguf.TokenType.CONTROL
|
||||
if self.spm.is_unused(token_id):
|
||||
toktype = gguf.TokenType.UNUSED
|
||||
if self.spm.is_byte(token_id):
|
||||
toktype = gguf.TokenType.BYTE
|
||||
else:
|
||||
if token_id == self.unk_token_id:
|
||||
toktype = gguf.TokenType.UNKNOWN
|
||||
if token_id in self.special_ids:
|
||||
toktype = gguf.TokenType.CONTROL
|
||||
|
||||
# NOTE: I think added_tokens are user defined.
|
||||
# ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
|
||||
# if tokenizer.is_user_defined(i): toktype = gguf.TokenType.USER_DEFINED
|
||||
return toktype
|
||||
|
||||
if tokenizer.is_unused(i):
|
||||
toktype = gguf.TokenType.UNUSED
|
||||
if tokenizer.is_byte(i):
|
||||
toktype = gguf.TokenType.BYTE
|
||||
|
||||
yield text, score, toktype
|
||||
def get_token_score(self, token_id: int) -> float:
|
||||
if self.spm is not None and token_id < self.spm.vocab_size():
|
||||
return cast(float, self.spm.get_score(token_id))
|
||||
return 0.0
|
||||
|
||||
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
for text in self.added_tokens_list:
|
||||
score = -1000.0
|
||||
yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
|
||||
|
||||
for text in self.added_tokens_dict:
|
||||
if text in self.specials:
|
||||
|
||||
toktype = self.get_token_type(self.specials[text])
|
||||
score = self.get_token_score(self.specials[text])
|
||||
|
||||
else:
|
||||
toktype = gguf.TokenType.USER_DEFINED
|
||||
score = -1000.0
|
||||
|
||||
yield text.encode("utf-8"), score, toktype
|
||||
|
||||
def has_newline_token(self) -> bool:
|
||||
return '<0x0A>' in self.tokenizer.vocab or '\n' in self.tokenizer.vocab
|
||||
|
||||
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||
yield from self.sentencepiece_tokens()
|
||||
yield from self.hf_tokens()
|
||||
yield from self.added_tokens()
|
||||
|
||||
def get_vocab_type(self) -> str:
|
||||
path_candidates = []
|
||||
vocab_file = "tokenizer.model"
|
||||
path_candidates.append(vocab_file)
|
||||
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
|
||||
if path_candidate is not None:
|
||||
return "llama"
|
||||
|
||||
vocab_file = "vocab.json"
|
||||
path_candidates.append(vocab_file)
|
||||
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
|
||||
if path_candidate is not None:
|
||||
return "gpt2"
|
||||
|
||||
vocab_file = "tokenizer.json"
|
||||
path_candidates.append(vocab_file)
|
||||
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
|
||||
if path_candidate:
|
||||
if not self.has_newline_token():
|
||||
return "gpt2"
|
||||
return "llama"
|
||||
|
||||
raise FileNotFoundError(
|
||||
f"Could not find {path_candidates} in {self.fname_tokenizer} or its parent; "
|
||||
"if it's in another directory, pass the directory as --vocab-dir"
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||
return f"<VocabLoader with {self.vocab_size_base} base tokens and {len(self.added_tokens_dict)} added tokens>"
|
||||
|
||||
|
||||
Vocab: TypeAlias = 'BpeVocab | SentencePieceVocab'
|
||||
Vocab: TypeAlias = 'VocabLoader'
|
||||
|
||||
|
||||
#
|
||||
# data loading
|
||||
@@ -824,20 +836,27 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc
|
||||
yield result
|
||||
|
||||
|
||||
def check_vocab_size(params: Params, vocab: Vocab) -> None:
|
||||
def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> None:
|
||||
if params.n_vocab != vocab.vocab_size:
|
||||
assert isinstance(vocab, BpeVocab) or isinstance(vocab, SentencePieceVocab)
|
||||
if params.n_vocab == vocab.vocab_size_base:
|
||||
if params.n_vocab == vocab.vocab_size:
|
||||
print("Ignoring added_tokens.json since model matches vocab size without it.")
|
||||
vocab.added_tokens_list = []
|
||||
vocab.vocab_size = vocab.vocab_size_base
|
||||
vocab.added_tokens_dict = OrderedDict()
|
||||
vocab.vocab_size = vocab.vocab_size
|
||||
return
|
||||
|
||||
if pad_vocab and params.n_vocab > vocab.vocab_size:
|
||||
pad_count = params.n_vocab - vocab.vocab_size
|
||||
print(f'Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>')
|
||||
for i in range(1, (params.n_vocab - vocab.vocab_size) + 1):
|
||||
vocab.added_tokens_dict[f'<dummy{i:05}>'] = -1
|
||||
vocab.vocab_size = params.n_vocab
|
||||
return
|
||||
msg = f"Vocab size mismatch (model has {params.n_vocab}, but {vocab.fname_tokenizer}"
|
||||
if vocab.fname_added_tokens is not None:
|
||||
msg += f" combined with {vocab.fname_added_tokens}"
|
||||
msg += f" has {vocab.vocab_size})."
|
||||
if vocab.vocab_size < params.n_vocab < vocab.vocab_size + 20 and vocab.fname_added_tokens is None:
|
||||
if vocab.vocab_size < params.n_vocab < vocab.vocab_size + 20:
|
||||
msg += f" Most likely you are missing added_tokens.json (should be in {vocab.fname_tokenizer.parent})."
|
||||
if vocab.vocab_size < params.n_vocab:
|
||||
msg += " Possibly try using the --padvocab option."
|
||||
raise Exception(msg)
|
||||
|
||||
|
||||
@@ -901,12 +920,8 @@ class OutputFile:
|
||||
scores.append(score)
|
||||
toktypes.append(toktype)
|
||||
|
||||
if isinstance(vocab, SentencePieceVocab):
|
||||
self.gguf.add_tokenizer_model("llama")
|
||||
elif isinstance(vocab, BpeVocab):
|
||||
self.gguf.add_tokenizer_model("gpt2")
|
||||
else:
|
||||
raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
|
||||
vocab_type = vocab.get_vocab_type()
|
||||
self.gguf.add_tokenizer_model(vocab_type)
|
||||
self.gguf.add_token_list(tokens)
|
||||
self.gguf.add_token_scores(scores)
|
||||
self.gguf.add_token_types(toktypes)
|
||||
@@ -932,8 +947,12 @@ class OutputFile:
|
||||
self.gguf.close()
|
||||
|
||||
@staticmethod
|
||||
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||
check_vocab_size(params, vocab)
|
||||
def write_vocab_only(
|
||||
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
|
||||
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
||||
pad_vocab: bool = False,
|
||||
) -> None:
|
||||
check_vocab_size(params, vocab, pad_vocab = pad_vocab)
|
||||
|
||||
of = OutputFile(fname_out, endianess=endianess)
|
||||
|
||||
@@ -960,8 +979,13 @@ class OutputFile:
|
||||
return dt.quantize(arr)
|
||||
|
||||
@staticmethod
|
||||
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||
check_vocab_size(params, vocab)
|
||||
def write_all(
|
||||
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab,
|
||||
concurrency: int = DEFAULT_CONCURRENCY,
|
||||
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
||||
pad_vocab: bool = False,
|
||||
) -> None:
|
||||
check_vocab_size(params, vocab, pad_vocab = pad_vocab)
|
||||
|
||||
of = OutputFile(fname_out, endianess=endianess)
|
||||
|
||||
@@ -1119,35 +1143,17 @@ def load_some_model(path: Path) -> ModelPlus:
|
||||
return model_plus
|
||||
|
||||
|
||||
def load_vocab(path: Path, vocabtype: str | None) -> Vocab:
|
||||
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
||||
# a directory, it might be the model directory, and tokenizer.model might
|
||||
# be in the parent of that.
|
||||
if path.is_dir():
|
||||
vocab_file = "tokenizer.model"
|
||||
if vocabtype == 'bpe':
|
||||
vocab_file = "vocab.json"
|
||||
path2 = path / vocab_file
|
||||
# Use `.parent` instead of /.. to handle the symlink case better.
|
||||
path3 = path.parent / vocab_file
|
||||
if path2.exists():
|
||||
path = path2
|
||||
elif path3.exists():
|
||||
path = path3
|
||||
else:
|
||||
raise FileNotFoundError(
|
||||
f"Could not find {vocab_file} in {path} or its parent; "
|
||||
"if it's in another directory, pass the directory as --vocab-dir")
|
||||
def find_vocab_file_path(path: Path, vocab_file: str) -> Optional[Path]:
|
||||
path2 = path / vocab_file
|
||||
# Use `.parent` instead of /.. to handle the symlink case better.
|
||||
path3 = path.parent / vocab_file
|
||||
|
||||
print(f"Loading vocab file '{path}', type '{vocabtype}'")
|
||||
if path2.exists():
|
||||
return path2
|
||||
if path3.exists():
|
||||
return path3
|
||||
|
||||
added_tokens_path = path.parent / "added_tokens.json"
|
||||
if vocabtype == "bpe":
|
||||
return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
|
||||
elif vocabtype == "spm":
|
||||
return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None)
|
||||
else:
|
||||
raise ValueError(f"Unsupported vocabulary type {vocabtype}")
|
||||
return None
|
||||
|
||||
|
||||
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
|
||||
@@ -1185,11 +1191,11 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
|
||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
||||
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
||||
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin, *.safetensors)")
|
||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
|
||||
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
|
||||
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
||||
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
|
||||
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
|
||||
parser.add_argument("--padvocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
||||
|
||||
args = parser.parse_args(args_in)
|
||||
if args.dump_single:
|
||||
@@ -1232,12 +1238,13 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
if not args.outfile:
|
||||
raise ValueError("need --outfile if using --vocab-only")
|
||||
# FIXME: Try to respect vocab_dir somehow?
|
||||
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
||||
vocab = VocabLoader(params, args.vocab_dir or args.model)
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
|
||||
load_merges = args.vocabtype == 'bpe',
|
||||
load_merges = True,
|
||||
n_vocab = vocab.vocab_size)
|
||||
outfile = args.outfile
|
||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab)
|
||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
||||
endianess = endianess, pad_vocab = args.padvocab)
|
||||
print(f"Wrote {outfile}")
|
||||
return
|
||||
|
||||
@@ -1245,12 +1252,15 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
vocab = model_plus.vocab
|
||||
else:
|
||||
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
||||
vocab = load_vocab(vocab_dir, args.vocabtype)
|
||||
vocab = VocabLoader(params, vocab_dir)
|
||||
|
||||
# FIXME: Try to respect vocab_dir somehow?
|
||||
print(f"Vocab info: {vocab}")
|
||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
|
||||
load_merges = args.vocabtype == 'bpe',
|
||||
load_merges = True,
|
||||
n_vocab = vocab.vocab_size)
|
||||
|
||||
print(f"Special vocab info: {special_vocab}")
|
||||
model = model_plus.model
|
||||
model = convert_model_names(model, params)
|
||||
ftype = pick_output_type(model, args.outtype)
|
||||
@@ -1260,7 +1270,8 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
params.ftype = ftype
|
||||
print(f"Writing {outfile}, format {ftype}")
|
||||
|
||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
|
||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
||||
concurrency = args.concurrency, endianess = endianess, pad_vocab = args.padvocab)
|
||||
print(f"Wrote {outfile}")
|
||||
|
||||
|
||||
|
||||
@@ -1258,9 +1258,9 @@ static struct ggml_tensor * forward_lora(
|
||||
}
|
||||
|
||||
static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
|
||||
assert(logits->n_dims == 2);
|
||||
assert(probs->n_dims == 2);
|
||||
assert(best_samples->n_dims == 1);
|
||||
assert(ggml_is_matrix(logits));
|
||||
assert(ggml_is_matrix(probs));
|
||||
assert(ggml_is_vector(best_samples));
|
||||
assert(logits->ne[1] == best_samples->ne[0]);
|
||||
assert(logits->ne[0] == probs->ne[0]);
|
||||
assert(logits->ne[1] == probs->ne[1]);
|
||||
@@ -1292,9 +1292,9 @@ static void sample_softmax_batch(
|
||||
struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
|
||||
struct ggml_tensor * best_samples
|
||||
) {
|
||||
GGML_ASSERT(best_samples->n_dims == 2);
|
||||
GGML_ASSERT(logits->n_dims == 3);
|
||||
GGML_ASSERT(probs->n_dims == 3);
|
||||
GGML_ASSERT(ggml_is_matrix(best_samples));
|
||||
GGML_ASSERT(ggml_is_3d(logits));
|
||||
GGML_ASSERT(ggml_is_3d(probs));
|
||||
int n_tokens = best_samples->ne[0];
|
||||
int n_batch = best_samples->ne[1];
|
||||
int n_vocab = logits->ne[0];
|
||||
@@ -1334,7 +1334,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
||||
}
|
||||
|
||||
static void print_matrix(struct ggml_tensor * probs) {
|
||||
assert(probs->n_dims == 2);
|
||||
assert(ggml_is_matrix(probs));
|
||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
|
||||
@@ -1386,8 +1386,8 @@ static void get_example_targets(int example_id, struct ggml_tensor * tokens_inpu
|
||||
static void get_example_targets_batch(
|
||||
struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
|
||||
) {
|
||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||
GGML_ASSERT( targets->n_dims == 3);
|
||||
GGML_ASSERT(ggml_is_matrix(tokens_input));
|
||||
GGML_ASSERT(ggml_is_3d(targets));
|
||||
int n_tokens = tokens_input->ne[0];
|
||||
int n_batch = tokens_input->ne[1];
|
||||
GGML_ASSERT(n_tokens == targets->ne[1]);
|
||||
|
||||
@@ -129,13 +129,13 @@ int main(int argc, char ** argv) {
|
||||
const ggml_type qtype = GGML_TYPE_Q4_1;
|
||||
|
||||
size_t ctx_size = 0;
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizez);
|
||||
ctx_size += ggml_row_size(qtype, sizex*sizey);
|
||||
ctx_size += ggml_row_size(qtype, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
|
||||
ctx_size += 1024*1024*16;
|
||||
|
||||
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
|
||||
|
||||
@@ -427,7 +427,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
||||
}
|
||||
|
||||
static void print_matrix(struct ggml_tensor * probs) {
|
||||
assert(probs->n_dims == 2);
|
||||
assert(ggml_is_matrix(probs));
|
||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = get_f32_2d(probs, k, i);
|
||||
@@ -639,7 +639,7 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
|
||||
|
||||
static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
|
||||
int ct;
|
||||
switch (gg_weights->n_dims){
|
||||
switch (ggml_n_dims(gg_weights)) {
|
||||
case 1:
|
||||
ct = 0;
|
||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
|
||||
|
||||
@@ -1110,7 +1110,7 @@ static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor,
|
||||
name = ggml_get_name(tensor);
|
||||
}
|
||||
uint32_t name_len = strlen(name);
|
||||
uint32_t nd = tensor->n_dims;
|
||||
uint32_t nd = ggml_n_dims(tensor);
|
||||
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
|
||||
(uint32_t)tensor->ne[1],
|
||||
(uint32_t)tensor->ne[2],
|
||||
|
||||
@@ -195,7 +195,7 @@ static bool gguf_ex_read_1(const std::string & fname) {
|
||||
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data);
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
|
||||
|
||||
// print first 10 elements
|
||||
const float * data = (const float *) cur->data;
|
||||
|
||||
1
examples/llama.swiftui/.gitignore
vendored
1
examples/llama.swiftui/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
xcuserdata
|
||||
xcshareddata
|
||||
|
||||
@@ -6,16 +6,34 @@ enum LlamaError: Error {
|
||||
case couldNotInitializeContext
|
||||
}
|
||||
|
||||
func llama_batch_clear(_ batch: inout llama_batch) {
|
||||
batch.n_tokens = 0
|
||||
}
|
||||
|
||||
func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama_pos, _ seq_ids: [llama_seq_id], _ logits: Bool) {
|
||||
batch.token [Int(batch.n_tokens)] = id
|
||||
batch.pos [Int(batch.n_tokens)] = pos
|
||||
batch.n_seq_id[Int(batch.n_tokens)] = Int32(seq_ids.count)
|
||||
for i in 0..<seq_ids.count {
|
||||
batch.seq_id[Int(batch.n_tokens)]![Int(i)] = seq_ids[i]
|
||||
}
|
||||
batch.logits [Int(batch.n_tokens)] = logits ? 1 : 0
|
||||
|
||||
batch.n_tokens += 1
|
||||
}
|
||||
|
||||
actor LlamaContext {
|
||||
private var model: OpaquePointer
|
||||
private var context: OpaquePointer
|
||||
private var batch: llama_batch
|
||||
private var tokens_list: [llama_token]
|
||||
|
||||
/// This variable is used to store temporarily invalid cchars
|
||||
private var temporary_invalid_cchars: [CChar]
|
||||
|
||||
var n_len: Int32 = 512
|
||||
var n_len: Int32 = 64
|
||||
var n_cur: Int32 = 0
|
||||
|
||||
var n_decode: Int32 = 0
|
||||
|
||||
init(model: OpaquePointer, context: OpaquePointer) {
|
||||
@@ -27,25 +45,34 @@ actor LlamaContext {
|
||||
}
|
||||
|
||||
deinit {
|
||||
llama_batch_free(batch)
|
||||
llama_free(context)
|
||||
llama_free_model(model)
|
||||
llama_backend_free()
|
||||
}
|
||||
|
||||
static func createContext(path: String) throws -> LlamaContext {
|
||||
static func create_context(path: String) throws -> LlamaContext {
|
||||
llama_backend_init(false)
|
||||
let model_params = llama_model_default_params()
|
||||
var model_params = llama_model_default_params()
|
||||
|
||||
#if targetEnvironment(simulator)
|
||||
model_params.n_gpu_layers = 0
|
||||
print("Running on simulator, force use n_gpu_layers = 0")
|
||||
#endif
|
||||
let model = llama_load_model_from_file(path, model_params)
|
||||
guard let model else {
|
||||
print("Could not load model at \(path)")
|
||||
throw LlamaError.couldNotInitializeContext
|
||||
}
|
||||
|
||||
let n_threads = max(1, min(8, ProcessInfo.processInfo.processorCount - 2))
|
||||
print("Using \(n_threads) threads")
|
||||
|
||||
var ctx_params = llama_context_default_params()
|
||||
ctx_params.seed = 1234
|
||||
ctx_params.seed = 1234
|
||||
ctx_params.n_ctx = 2048
|
||||
ctx_params.n_threads = 8
|
||||
ctx_params.n_threads_batch = 8
|
||||
ctx_params.n_threads = UInt32(n_threads)
|
||||
ctx_params.n_threads_batch = UInt32(n_threads)
|
||||
|
||||
let context = llama_new_context_with_model(model, ctx_params)
|
||||
guard let context else {
|
||||
@@ -56,6 +83,26 @@ actor LlamaContext {
|
||||
return LlamaContext(model: model, context: context)
|
||||
}
|
||||
|
||||
func model_info() -> String {
|
||||
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 256)
|
||||
result.initialize(repeating: Int8(0), count: 256)
|
||||
defer {
|
||||
result.deallocate()
|
||||
}
|
||||
|
||||
// TODO: this is probably very stupid way to get the string from C
|
||||
|
||||
let nChars = llama_model_desc(model, result, 256)
|
||||
let bufferPointer = UnsafeBufferPointer(start: result, count: Int(nChars))
|
||||
|
||||
var SwiftString = ""
|
||||
for char in bufferPointer {
|
||||
SwiftString.append(Character(UnicodeScalar(UInt8(char))))
|
||||
}
|
||||
|
||||
return SwiftString
|
||||
}
|
||||
|
||||
func get_n_tokens() -> Int32 {
|
||||
return batch.n_tokens;
|
||||
}
|
||||
@@ -79,16 +126,11 @@ actor LlamaContext {
|
||||
print(String(cString: token_to_piece(token: id) + [0]))
|
||||
}
|
||||
|
||||
// batch = llama_batch_init(512, 0) // done in init()
|
||||
batch.n_tokens = Int32(tokens_list.count)
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
for i1 in 0..<batch.n_tokens {
|
||||
for i1 in 0..<tokens_list.count {
|
||||
let i = Int(i1)
|
||||
batch.token[i] = tokens_list[i]
|
||||
batch.pos[i] = i1
|
||||
batch.n_seq_id[Int(i)] = 1
|
||||
batch.seq_id[Int(i)]![0] = 0
|
||||
batch.logits[i] = 0
|
||||
llama_batch_add(&batch, tokens_list[i], Int32(i), [0], false)
|
||||
}
|
||||
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||
|
||||
@@ -141,18 +183,11 @@ actor LlamaContext {
|
||||
print(new_token_str)
|
||||
// tokens_list.append(new_token_id)
|
||||
|
||||
batch.n_tokens = 0
|
||||
|
||||
batch.token[Int(batch.n_tokens)] = new_token_id
|
||||
batch.pos[Int(batch.n_tokens)] = n_cur
|
||||
batch.n_seq_id[Int(batch.n_tokens)] = 1
|
||||
batch.seq_id[Int(batch.n_tokens)]![0] = 0
|
||||
batch.logits[Int(batch.n_tokens)] = 1 // true
|
||||
batch.n_tokens += 1
|
||||
llama_batch_clear(&batch)
|
||||
llama_batch_add(&batch, new_token_id, n_cur, [0], true)
|
||||
|
||||
n_decode += 1
|
||||
|
||||
n_cur += 1
|
||||
n_cur += 1
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("failed to evaluate llama!")
|
||||
@@ -161,14 +196,111 @@ actor LlamaContext {
|
||||
return new_token_str
|
||||
}
|
||||
|
||||
func bench(pp: Int, tg: Int, pl: Int, nr: Int = 1) -> String {
|
||||
var pp_avg: Double = 0
|
||||
var tg_avg: Double = 0
|
||||
|
||||
var pp_std: Double = 0
|
||||
var tg_std: Double = 0
|
||||
|
||||
for r in 0..<nr {
|
||||
// bench prompt processing
|
||||
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
let n_tokens = pp
|
||||
|
||||
for i in 0..<n_tokens {
|
||||
llama_batch_add(&batch, 0, Int32(i), [0], false)
|
||||
}
|
||||
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_pp_start = ggml_time_us()
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during prompt")
|
||||
}
|
||||
|
||||
let t_pp_end = ggml_time_us()
|
||||
|
||||
// bench text generation
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_tg_start = ggml_time_us()
|
||||
|
||||
for i in 0..<tg {
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
for j in 0..<pl {
|
||||
llama_batch_add(&batch, 0, Int32(i), [Int32(j)], true)
|
||||
}
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during text generation")
|
||||
}
|
||||
}
|
||||
|
||||
let t_tg_end = ggml_time_us()
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_pp = Double(t_pp_end - t_pp_start) / 1000000.0
|
||||
let t_tg = Double(t_tg_end - t_tg_start) / 1000000.0
|
||||
|
||||
let speed_pp = Double(pp) / t_pp
|
||||
let speed_tg = Double(pl*tg) / t_tg
|
||||
|
||||
pp_avg += speed_pp
|
||||
tg_avg += speed_tg
|
||||
|
||||
pp_std += speed_pp * speed_pp
|
||||
tg_std += speed_tg * speed_tg
|
||||
|
||||
print("pp \(speed_pp) t/s, tg \(speed_tg) t/s")
|
||||
}
|
||||
|
||||
pp_avg /= Double(nr)
|
||||
tg_avg /= Double(nr)
|
||||
|
||||
if nr > 1 {
|
||||
pp_std = sqrt(pp_std / Double(nr - 1) - pp_avg * pp_avg * Double(nr) / Double(nr - 1))
|
||||
tg_std = sqrt(tg_std / Double(nr - 1) - tg_avg * tg_avg * Double(nr) / Double(nr - 1))
|
||||
} else {
|
||||
pp_std = 0
|
||||
tg_std = 0
|
||||
}
|
||||
|
||||
let model_desc = model_info();
|
||||
let model_size = String(format: "%.2f GiB", Double(llama_model_size(model)) / 1024.0 / 1024.0 / 1024.0);
|
||||
let model_n_params = String(format: "%.2f B", Double(llama_model_n_params(model)) / 1e9);
|
||||
let backend = "Metal";
|
||||
let pp_avg_str = String(format: "%.2f", pp_avg);
|
||||
let tg_avg_str = String(format: "%.2f", tg_avg);
|
||||
let pp_std_str = String(format: "%.2f", pp_std);
|
||||
let tg_std_str = String(format: "%.2f", tg_std);
|
||||
|
||||
var result = ""
|
||||
|
||||
result += String("| model | size | params | backend | test | t/s |\n")
|
||||
result += String("| --- | --- | --- | --- | --- | --- |\n")
|
||||
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | pp \(pp) | \(pp_avg_str) ± \(pp_std_str) |\n")
|
||||
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | tg \(tg) | \(tg_avg_str) ± \(tg_std_str) |\n")
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
func clear() {
|
||||
tokens_list.removeAll()
|
||||
temporary_invalid_cchars.removeAll()
|
||||
llama_kv_cache_clear(context)
|
||||
}
|
||||
|
||||
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
|
||||
let utf8Count = text.utf8.count
|
||||
let n_tokens = utf8Count + (add_bos ? 1 : 0)
|
||||
let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1
|
||||
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
|
||||
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
|
||||
|
||||
|
||||
@@ -1,481 +1,483 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 56;
|
||||
objects = {
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 56;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; };
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; };
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; };
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; };
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; };
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; };
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = "<group>"; };
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = "<group>"; };
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = "<group>"; };
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = "<group>"; };
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = "<group>"; };
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = "<group>"; };
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = "<group>"; };
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = "<group>"; };
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = "<group>"; };
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = "llama-2-7b-chat.Q2_K.gguf"; sourceTree = "<group>"; };
|
||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = "<group>"; };
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = "<group>"; };
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = "<group>"; };
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = "<group>"; };
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = "<group>"; };
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = "<group>"; };
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = "<group>"; };
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = "<group>"; };
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = "<group>"; };
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */,
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */,
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */,
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */,
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */,
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */,
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */,
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */,
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */,
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */,
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */,
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */,
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */,
|
||||
);
|
||||
name = llama.cpp;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C836A2AC328BD0096AF73 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */,
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
||||
8A3F84232AC4C891005E2EE8 /* models */,
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
||||
8A1C83742AC328BD0096AF73 /* Products */,
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83742AC328BD0096AF73 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */,
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */,
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */,
|
||||
);
|
||||
path = llama.swiftui;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */,
|
||||
);
|
||||
path = "Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */,
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */,
|
||||
);
|
||||
path = Resources;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */,
|
||||
);
|
||||
path = models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */,
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
||||
);
|
||||
path = llama.cpp.swift;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */,
|
||||
);
|
||||
path = Models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */,
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */,
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */,
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */,
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */,
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */,
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */,
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */,
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */,
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */,
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */,
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */,
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */,
|
||||
);
|
||||
name = llama.cpp;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C836A2AC328BD0096AF73 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */,
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
||||
8A3F84232AC4C891005E2EE8 /* models */,
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
||||
8A1C83742AC328BD0096AF73 /* Products */,
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83742AC328BD0096AF73 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */,
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */,
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */,
|
||||
);
|
||||
path = llama.swiftui;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */,
|
||||
);
|
||||
path = "Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */,
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */,
|
||||
);
|
||||
path = Resources;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
);
|
||||
path = models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */,
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
||||
);
|
||||
path = llama.cpp.swift;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */,
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */,
|
||||
);
|
||||
path = Models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */;
|
||||
buildPhases = (
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */,
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */,
|
||||
8A1C83712AC328BD0096AF73 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = llama.swiftui;
|
||||
packageProductDependencies = (
|
||||
);
|
||||
productName = llama.swiftui;
|
||||
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */;
|
||||
buildPhases = (
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */,
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */,
|
||||
8A1C83712AC328BD0096AF73 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = llama.swiftui;
|
||||
packageProductDependencies = (
|
||||
);
|
||||
productName = llama.swiftui;
|
||||
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
8A1C836B2AC328BD0096AF73 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = 1;
|
||||
LastSwiftUpdateCheck = 1500;
|
||||
LastUpgradeCheck = 1500;
|
||||
TargetAttributes = {
|
||||
8A1C83722AC328BD0096AF73 = {
|
||||
CreatedOnToolsVersion = 15.0;
|
||||
LastSwiftMigration = 1500;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */;
|
||||
compatibilityVersion = "Xcode 14.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 8A1C836A2AC328BD0096AF73;
|
||||
packageReferences = (
|
||||
);
|
||||
productRefGroup = 8A1C83742AC328BD0096AF73 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */,
|
||||
);
|
||||
};
|
||||
8A1C836B2AC328BD0096AF73 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = 1;
|
||||
LastSwiftUpdateCheck = 1500;
|
||||
LastUpgradeCheck = 1500;
|
||||
TargetAttributes = {
|
||||
8A1C83722AC328BD0096AF73 = {
|
||||
CreatedOnToolsVersion = 15.0;
|
||||
LastSwiftMigration = 1500;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */;
|
||||
compatibilityVersion = "Xcode 14.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 8A1C836A2AC328BD0096AF73;
|
||||
packageReferences = (
|
||||
);
|
||||
productRefGroup = 8A1C83742AC328BD0096AF73 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXResourcesBuildPhase section */
|
||||
8A1C83712AC328BD0096AF73 /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */,
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C83712AC328BD0096AF73 /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */,
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXResourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */,
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83802AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
MTL_FAST_MATH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_COMPILATION_MODE = wholemodule;
|
||||
VALIDATE_PRODUCT = YES;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C83822AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83832AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83802AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
MTL_FAST_MATH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_COMPILATION_MODE = wholemodule;
|
||||
VALIDATE_PRODUCT = YES;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C83822AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83832AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */,
|
||||
8A1C83802AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C83822AC328BE0096AF73 /* Debug */,
|
||||
8A1C83832AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */,
|
||||
8A1C83802AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C83822AC328BE0096AF73 /* Debug */,
|
||||
8A1C83832AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
||||
};
|
||||
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
||||
}
|
||||
|
||||
@@ -3,24 +3,26 @@ import Foundation
|
||||
@MainActor
|
||||
class LlamaState: ObservableObject {
|
||||
@Published var messageLog = ""
|
||||
@Published var cacheCleared = false
|
||||
|
||||
private var llamaContext: LlamaContext?
|
||||
private var modelUrl: URL? {
|
||||
Bundle.main.url(forResource: "q8_0", withExtension: "gguf", subdirectory: "models")
|
||||
private var defaultModelUrl: URL? {
|
||||
Bundle.main.url(forResource: "ggml-model", withExtension: "gguf", subdirectory: "models")
|
||||
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
|
||||
}
|
||||
|
||||
init() {
|
||||
do {
|
||||
try loadModel()
|
||||
try loadModel(modelUrl: defaultModelUrl)
|
||||
} catch {
|
||||
messageLog += "Error!\n"
|
||||
}
|
||||
}
|
||||
|
||||
private func loadModel() throws {
|
||||
func loadModel(modelUrl: URL?) throws {
|
||||
messageLog += "Loading model...\n"
|
||||
if let modelUrl {
|
||||
llamaContext = try LlamaContext.createContext(path: modelUrl.path())
|
||||
llamaContext = try LlamaContext.create_context(path: modelUrl.path())
|
||||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||
} else {
|
||||
messageLog += "Could not locate model\n"
|
||||
@@ -31,7 +33,7 @@ class LlamaState: ObservableObject {
|
||||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
messageLog += "Attempting to complete text...\n"
|
||||
|
||||
await llamaContext.completion_init(text: text)
|
||||
messageLog += "\(text)"
|
||||
|
||||
@@ -42,4 +44,42 @@ class LlamaState: ObservableObject {
|
||||
await llamaContext.clear()
|
||||
messageLog += "\n\ndone\n"
|
||||
}
|
||||
|
||||
func bench() async {
|
||||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
|
||||
messageLog += "\n"
|
||||
messageLog += "Running benchmark...\n"
|
||||
messageLog += "Model info: "
|
||||
messageLog += await llamaContext.model_info() + "\n"
|
||||
|
||||
let t_start = DispatchTime.now().uptimeNanoseconds
|
||||
await llamaContext.bench(pp: 8, tg: 4, pl: 1) // heat up
|
||||
let t_end = DispatchTime.now().uptimeNanoseconds
|
||||
|
||||
let t_heat = Double(t_end - t_start) / 1_000_000_000.0
|
||||
messageLog += "Heat up time: \(t_heat) seconds, please wait...\n"
|
||||
|
||||
// if more than 5 seconds, then we're probably running on a slow device
|
||||
if t_heat > 5.0 {
|
||||
messageLog += "Heat up time is too long, aborting benchmark\n"
|
||||
return
|
||||
}
|
||||
|
||||
let result = await llamaContext.bench(pp: 512, tg: 128, pl: 1, nr: 3)
|
||||
|
||||
messageLog += "\(result)"
|
||||
messageLog += "\n"
|
||||
}
|
||||
|
||||
func clear() async {
|
||||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
|
||||
await llamaContext.clear()
|
||||
messageLog = ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,24 +5,97 @@ struct ContentView: View {
|
||||
|
||||
@State private var multiLineText = ""
|
||||
|
||||
private static func cleanupModelCaches() {
|
||||
// Delete all models (*.gguf)
|
||||
let fileManager = FileManager.default
|
||||
let documentsUrl = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
|
||||
do {
|
||||
let fileURLs = try fileManager.contentsOfDirectory(at: documentsUrl, includingPropertiesForKeys: nil)
|
||||
for fileURL in fileURLs {
|
||||
if fileURL.pathExtension == "gguf" {
|
||||
try fileManager.removeItem(at: fileURL)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
print("Error while enumerating files \(documentsUrl.path): \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
ScrollView(.vertical) {
|
||||
ScrollView(.vertical, showsIndicators: true) {
|
||||
Text(llamaState.messageLog)
|
||||
.font(.system(size: 12))
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.padding()
|
||||
.onTapGesture {
|
||||
UIApplication.shared.sendAction(#selector(UIResponder.resignFirstResponder), to: nil, from: nil, for: nil)
|
||||
}
|
||||
}
|
||||
|
||||
TextEditor(text: $multiLineText)
|
||||
.frame(height: 200)
|
||||
.frame(height: 80)
|
||||
.padding()
|
||||
.border(Color.gray, width: 0.5)
|
||||
Button(action: {
|
||||
sendText()
|
||||
}) {
|
||||
Text("Send")
|
||||
.padding()
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
HStack {
|
||||
Button("Send") {
|
||||
sendText()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Bench") {
|
||||
bench()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Clear") {
|
||||
clear()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Copy") {
|
||||
UIPasteboard.general.string = llamaState.messageLog
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
}
|
||||
|
||||
VStack {
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "TinyLlama-1.1B (Q4_0)",
|
||||
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||
filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
.padding(.top, 4)
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "TinyLlama-1.1B (Q8_0)",
|
||||
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q8_0.gguf?download=true",
|
||||
filename: "tinyllama-1.1b-1t-openorca.Q8_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
|
||||
Button("Clear downloaded models") {
|
||||
ContentView.cleanupModelCaches()
|
||||
llamaState.cacheCleared = true
|
||||
}
|
||||
.padding(8)
|
||||
.font(.system(size: 12))
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
@@ -34,9 +107,20 @@ struct ContentView: View {
|
||||
multiLineText = ""
|
||||
}
|
||||
}
|
||||
|
||||
func bench() {
|
||||
Task {
|
||||
await llamaState.bench()
|
||||
}
|
||||
}
|
||||
|
||||
func clear() {
|
||||
Task {
|
||||
await llamaState.clear()
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
#Preview {
|
||||
ContentView()
|
||||
}
|
||||
*/
|
||||
|
||||
//#Preview {
|
||||
// ContentView()
|
||||
//}
|
||||
|
||||
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
@@ -0,0 +1,122 @@
|
||||
import SwiftUI
|
||||
|
||||
struct DownloadButton: View {
|
||||
@ObservedObject private var llamaState: LlamaState
|
||||
private var modelName: String
|
||||
private var modelUrl: String
|
||||
private var filename: String
|
||||
|
||||
@State private var status: String
|
||||
|
||||
@State private var downloadTask: URLSessionDownloadTask?
|
||||
@State private var progress = 0.0
|
||||
@State private var observation: NSKeyValueObservation?
|
||||
|
||||
private static func getFileURL(filename: String) -> URL {
|
||||
FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(filename)
|
||||
}
|
||||
|
||||
private func checkFileExistenceAndUpdateStatus() {
|
||||
}
|
||||
|
||||
init(llamaState: LlamaState, modelName: String, modelUrl: String, filename: String) {
|
||||
self.llamaState = llamaState
|
||||
self.modelName = modelName
|
||||
self.modelUrl = modelUrl
|
||||
self.filename = filename
|
||||
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||
}
|
||||
|
||||
private func download() {
|
||||
status = "downloading"
|
||||
print("Downloading model \(modelName) from \(modelUrl)")
|
||||
guard let url = URL(string: modelUrl) else { return }
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
|
||||
downloadTask = URLSession.shared.downloadTask(with: url) { temporaryURL, response, error in
|
||||
if let error = error {
|
||||
print("Error: \(error.localizedDescription)")
|
||||
return
|
||||
}
|
||||
|
||||
guard let response = response as? HTTPURLResponse, (200...299).contains(response.statusCode) else {
|
||||
print("Server error!")
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
if let temporaryURL = temporaryURL {
|
||||
try FileManager.default.copyItem(at: temporaryURL, to: fileURL)
|
||||
print("Writing to \(filename) completed")
|
||||
|
||||
llamaState.cacheCleared = false
|
||||
|
||||
status = "downloaded"
|
||||
}
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
observation = downloadTask?.progress.observe(\.fractionCompleted) { progress, _ in
|
||||
self.progress = progress.fractionCompleted
|
||||
}
|
||||
|
||||
downloadTask?.resume()
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
if status == "download" {
|
||||
Button(action: download) {
|
||||
Text("Download " + modelName)
|
||||
}
|
||||
} else if status == "downloading" {
|
||||
Button(action: {
|
||||
downloadTask?.cancel()
|
||||
status = "download"
|
||||
}) {
|
||||
Text("\(modelName) (Downloading \(Int(progress * 100))%)")
|
||||
}
|
||||
} else if status == "downloaded" {
|
||||
Button(action: {
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
if !FileManager.default.fileExists(atPath: fileURL.path) {
|
||||
download()
|
||||
return
|
||||
}
|
||||
do {
|
||||
try llamaState.loadModel(modelUrl: fileURL)
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
}) {
|
||||
Text("\(modelName) (Downloaded)")
|
||||
}
|
||||
} else {
|
||||
Text("Unknown status")
|
||||
}
|
||||
}
|
||||
.onDisappear() {
|
||||
downloadTask?.cancel()
|
||||
}
|
||||
.onChange(of: llamaState.cacheCleared) { newValue in
|
||||
if newValue {
|
||||
downloadTask?.cancel()
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// #Preview {
|
||||
// DownloadButton(
|
||||
// llamaState: LlamaState(),
|
||||
// modelName: "TheBloke / TinyLlama-1.1B-1T-OpenOrca-GGUF (Q4_0)",
|
||||
// modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||
// filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||
// )
|
||||
// }
|
||||
@@ -514,7 +514,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
ctx_size += padded_size;
|
||||
if (verbosity >= 3) {
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i,
|
||||
cur->n_dims, cur->name, tensor_size, padded_size, offset);
|
||||
ggml_n_dims(cur), cur->name, tensor_size, padded_size, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -962,7 +962,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
}
|
||||
|
||||
// quantize only 2D tensors
|
||||
quantize &= (cur->n_dims == 2);
|
||||
quantize &= (ggml_n_dims(cur) == 2);
|
||||
|
||||
if (quantize) {
|
||||
new_type = type;
|
||||
@@ -1035,7 +1035,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
fout.put(0);
|
||||
}
|
||||
|
||||
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), cur->n_dims, quantize,
|
||||
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize,
|
||||
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||
}
|
||||
|
||||
|
||||
12
ggml-cuda.cu
12
ggml-cuda.cu
@@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg
|
||||
(void) dst;
|
||||
}
|
||||
|
||||
static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]);
|
||||
}
|
||||
|
||||
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
|
||||
const int64_t nrows = ggml_nrows(tensor);
|
||||
|
||||
@@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
|
||||
|
||||
// pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
|
||||
if (ne0 % MATRIX_ROW_PADDING != 0) {
|
||||
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
|
||||
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
|
||||
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
||||
}
|
||||
|
||||
char * buf;
|
||||
@@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
||||
|
||||
if (ggml_is_quantized(tensor->type)) {
|
||||
if (ne0 % MATRIX_ROW_PADDING != 0) {
|
||||
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
|
||||
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
|
||||
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
125
ggml.c
125
ggml.c
@@ -1997,12 +1997,6 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
|
||||
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
|
||||
}
|
||||
|
||||
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return (nrows_split*tensor->ne[0]*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type);
|
||||
}
|
||||
|
||||
int ggml_blck_size(enum ggml_type type) {
|
||||
return type_traits[type].blck_size;
|
||||
}
|
||||
@@ -2011,8 +2005,13 @@ size_t ggml_type_size(enum ggml_type type) {
|
||||
return type_traits[type].type_size;
|
||||
}
|
||||
|
||||
float ggml_type_sizef(enum ggml_type type) {
|
||||
return ((float)(type_traits[type].type_size))/type_traits[type].blck_size;
|
||||
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
|
||||
assert(ne % ggml_blck_size(type) == 0);
|
||||
return ggml_type_size(type)*ne/ggml_blck_size(type);
|
||||
}
|
||||
|
||||
double ggml_type_sizef(enum ggml_type type) {
|
||||
return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
|
||||
}
|
||||
|
||||
const char * ggml_type_name(enum ggml_type type) {
|
||||
@@ -2049,24 +2048,37 @@ size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||
return ggml_type_size(tensor->type);
|
||||
}
|
||||
|
||||
static inline bool ggml_is_scalar(const struct ggml_tensor * tensor) {
|
||||
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||
}
|
||||
|
||||
static inline bool ggml_is_vector(const struct ggml_tensor * tensor) {
|
||||
bool ggml_is_vector(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||
}
|
||||
|
||||
static inline bool ggml_is_matrix(const struct ggml_tensor * tensor) {
|
||||
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||
}
|
||||
|
||||
bool ggml_is_3d(const struct ggml_tensor * tensor) {
|
||||
return tensor->ne[3] == 1;
|
||||
}
|
||||
|
||||
int ggml_n_dims(const struct ggml_tensor * tensor) {
|
||||
for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
|
||||
if (tensor->ne[i] > 1) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
@@ -2473,7 +2485,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||
view_src = view_src->view_src;
|
||||
}
|
||||
|
||||
size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
|
||||
size_t data_size = ggml_row_size(type, ne[0]);
|
||||
for (int i = 1; i < n_dims; i++) {
|
||||
data_size *= ne[i];
|
||||
}
|
||||
@@ -2516,7 +2528,6 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||
/*.type =*/ type,
|
||||
/*.backend =*/ GGML_BACKEND_CPU,
|
||||
/*.buffer =*/ NULL,
|
||||
/*.n_dims =*/ n_dims,
|
||||
/*.ne =*/ { 1, 1, 1, 1 },
|
||||
/*.nb =*/ { 0, 0, 0, 0 },
|
||||
/*.op =*/ GGML_OP_NONE,
|
||||
@@ -2623,7 +2634,7 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
|
||||
return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne);
|
||||
return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
|
||||
}
|
||||
|
||||
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
||||
@@ -3072,7 +3083,7 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char *
|
||||
struct ggml_tensor * ggml_view_tensor(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * src) {
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0);
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
|
||||
ggml_format_name(result, "%s (view)", src->name);
|
||||
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
@@ -3230,10 +3241,10 @@ static struct ggml_tensor * ggml_add_cast_impl(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, type, a->n_dims, a->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
|
||||
|
||||
result->op = GGML_OP_ADD;
|
||||
result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne) : NULL;
|
||||
result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
@@ -3602,12 +3613,12 @@ struct ggml_tensor * ggml_sum_rows(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
int64_t ne[4] = {1,1,1,1};
|
||||
for (int i=1; i<a->n_dims; ++i) {
|
||||
int64_t ne[GGML_MAX_DIMS] = { 1 };
|
||||
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||
ne[i] = a->ne[i];
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, a->n_dims, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
|
||||
|
||||
result->op = GGML_OP_SUM_ROWS;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -3628,8 +3639,8 @@ struct ggml_tensor * ggml_mean(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS] = { 1, a->ne[1], a->ne[2], a->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne);
|
||||
int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
result->op = GGML_OP_MEAN;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -3651,8 +3662,7 @@ struct ggml_tensor * ggml_argmax(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS] = { a->ne[1], 1, 1, 1 };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
|
||||
|
||||
result->op = GGML_OP_ARGMAX;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -3675,7 +3685,7 @@ struct ggml_tensor * ggml_repeat(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
|
||||
|
||||
result->op = GGML_OP_REPEAT;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -3702,7 +3712,7 @@ struct ggml_tensor * ggml_repeat_back(
|
||||
return a;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
|
||||
|
||||
result->op = GGML_OP_REPEAT_BACK;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -4078,7 +4088,7 @@ struct ggml_tensor * ggml_mul_mat(
|
||||
}
|
||||
|
||||
const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
result->op = GGML_OP_MUL_MAT;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -4112,7 +4122,7 @@ struct ggml_tensor * ggml_mul_mat_id(
|
||||
}
|
||||
|
||||
const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, id);
|
||||
ggml_set_op_params_i32(result, 1, n_as);
|
||||
@@ -4150,7 +4160,7 @@ struct ggml_tensor * ggml_out_prod(
|
||||
|
||||
// a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
|
||||
const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
result->op = GGML_OP_OUT_PROD;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -4435,7 +4445,7 @@ struct ggml_tensor * ggml_reshape(
|
||||
//GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0);
|
||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
|
||||
ggml_format_name(result, "%s (reshaped)", a->name);
|
||||
|
||||
result->op = GGML_OP_RESHAPE;
|
||||
@@ -4813,7 +4823,7 @@ struct ggml_tensor * ggml_diag(
|
||||
}
|
||||
|
||||
const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, MAX(a->n_dims, 2), ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
|
||||
|
||||
result->op = GGML_OP_DIAG;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -5460,7 +5470,7 @@ struct ggml_tensor * ggml_pool_1d(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
const int64_t ne[3] = {
|
||||
const int64_t ne[2] = {
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
a->ne[1],
|
||||
};
|
||||
@@ -5579,7 +5589,7 @@ struct ggml_tensor * ggml_argsort(
|
||||
enum ggml_sort_order order) {
|
||||
bool is_node = false;
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, a->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
||||
|
||||
@@ -5626,7 +5636,7 @@ struct ggml_tensor * ggml_flash_attn(
|
||||
}
|
||||
|
||||
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, q->ne);
|
||||
|
||||
int32_t t = masked ? 1 : 0;
|
||||
ggml_set_op_params(result, &t, sizeof(t));
|
||||
@@ -5659,7 +5669,7 @@ struct ggml_tensor * ggml_flash_ff(
|
||||
}
|
||||
|
||||
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne);
|
||||
|
||||
result->op = GGML_OP_FLASH_FF;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -5775,7 +5785,6 @@ struct ggml_tensor * ggml_win_part(
|
||||
const int np = npx*npy;
|
||||
|
||||
const int64_t ne[4] = { a->ne[0], w, w, np, };
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { npx, npy, w };
|
||||
@@ -7759,10 +7768,10 @@ static void ggml_compute_forward_mul_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
// TODO: OpenCL kernel support broadcast
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
if (src1->backend == GGML_BACKEND_GPU) {
|
||||
GGML_ASSERT(ggml_are_same_shape(src0, src1));
|
||||
// TODO: OpenCL kernel support full broadcast
|
||||
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
||||
if (ith == 0) {
|
||||
ggml_cl_mul(src0, src1, dst);
|
||||
}
|
||||
@@ -9683,7 +9692,7 @@ static void ggml_compute_forward_mul_mat(
|
||||
if (params->type == GGML_TASK_INIT) {
|
||||
if (src1->type != vec_dot_type) {
|
||||
char * wdata = params->wdata;
|
||||
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
|
||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||
|
||||
assert(params->wsize >= ne11*ne12*ne13*row_size);
|
||||
assert(src1->type == GGML_TYPE_F32);
|
||||
@@ -9706,7 +9715,7 @@ static void ggml_compute_forward_mul_mat(
|
||||
}
|
||||
|
||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
|
||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = cne1*ne12*ne13; // src1 rows
|
||||
@@ -14558,7 +14567,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
||||
return replacements->vals[i];
|
||||
}
|
||||
|
||||
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne);
|
||||
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, GGML_MAX_DIMS, node->ne);
|
||||
|
||||
// insert clone into replacements
|
||||
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
||||
@@ -16311,7 +16320,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
||||
} else
|
||||
#endif
|
||||
if (node->src[1]->type != vec_dot_type) {
|
||||
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
|
||||
cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
@@ -16328,7 +16337,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
||||
} else
|
||||
#endif
|
||||
if (b->type != vec_dot_type) {
|
||||
cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type);
|
||||
cur = ggml_row_size(vec_dot_type, ggml_nelements(b));
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_OUT_PROD:
|
||||
@@ -16559,7 +16568,7 @@ static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fou
|
||||
fprintf(fout, "%-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %16p %32s\n",
|
||||
ggml_type_name(tensor->type),
|
||||
ggml_op_name (tensor->op),
|
||||
tensor->n_dims,
|
||||
ggml_n_dims(tensor),
|
||||
ne[0], ne[1], ne[2], ne[3],
|
||||
nb[0], nb[1], nb[2], nb[3],
|
||||
tensor->data,
|
||||
@@ -16574,7 +16583,7 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
|
||||
arg,
|
||||
ggml_type_name(tensor->type),
|
||||
ggml_op_name (tensor->op),
|
||||
tensor->n_dims,
|
||||
ggml_n_dims(tensor),
|
||||
ne[0], ne[1], ne[2], ne[3],
|
||||
nb[0], nb[1], nb[2], nb[3],
|
||||
tensor->data,
|
||||
@@ -16664,11 +16673,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
||||
|
||||
const uint32_t type = tensor->type;
|
||||
const uint32_t op = tensor->op;
|
||||
const uint32_t n_dims = tensor->n_dims;
|
||||
|
||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
||||
|
||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||
const uint64_t ne = tensor->ne[j];
|
||||
@@ -16698,11 +16705,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
||||
|
||||
const uint32_t type = tensor->type;
|
||||
const uint32_t op = tensor->op;
|
||||
const uint32_t n_dims = tensor->n_dims;
|
||||
|
||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
||||
|
||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||
const uint64_t ne = tensor->ne[j];
|
||||
@@ -16874,12 +16879,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
{
|
||||
uint32_t type;
|
||||
uint32_t op;
|
||||
uint32_t n_dims;
|
||||
|
||||
for (uint32_t i = 0; i < n_leafs; ++i) {
|
||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS];
|
||||
size_t nb[GGML_MAX_DIMS];
|
||||
@@ -16895,7 +16898,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
nb[j] = nb_cur;
|
||||
}
|
||||
|
||||
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
||||
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
||||
|
||||
tensor->op = (enum ggml_op) op;
|
||||
|
||||
@@ -16912,7 +16915,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
|
||||
ptr += ggml_nbytes(tensor);
|
||||
|
||||
fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
||||
fprintf(stderr, "%s: loaded leaf %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16922,12 +16925,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
{
|
||||
uint32_t type;
|
||||
uint32_t op;
|
||||
uint32_t n_dims;
|
||||
|
||||
for (uint32_t i = 0; i < n_nodes; ++i) {
|
||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
||||
|
||||
enum ggml_op eop = (enum ggml_op) op;
|
||||
|
||||
@@ -16998,7 +16999,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
||||
tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
||||
|
||||
tensor->op = eop;
|
||||
} break;
|
||||
@@ -17017,7 +17018,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
||||
|
||||
result->nodes[i] = tensor;
|
||||
|
||||
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
||||
fprintf(stderr, "%s: loaded node %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17155,7 +17156,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
||||
fprintf(fp, "(%s)|", ggml_type_name(node->type));
|
||||
}
|
||||
|
||||
if (node->n_dims == 2) {
|
||||
if (ggml_is_matrix(node)) {
|
||||
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
||||
} else {
|
||||
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
||||
@@ -17422,7 +17423,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
||||
int64_t i = 0;
|
||||
for (int p = 0; p < np; ++p) {
|
||||
const int64_t ne = ggml_nelements(ps[p]);
|
||||
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
|
||||
const float p_decay = ((ggml_n_dims(ps[p]) >= decay_min_ndim) ? decay : 0.0f) * sched;
|
||||
for (int64_t j = 0; j < ne; ++j) {
|
||||
float x = ggml_get_f32_1d(ps[p], j);
|
||||
float g_ = g[i]*gnorm;
|
||||
@@ -18696,7 +18697,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
|
||||
const size_t size_cur = ggml_row_size(info->type, ne);
|
||||
|
||||
ctx->size += GGML_PAD(size_cur, ctx->alignment);
|
||||
}
|
||||
@@ -19200,8 +19201,8 @@ void gguf_add_tensor(
|
||||
ctx->infos[idx].ne[i] = 1;
|
||||
}
|
||||
|
||||
ctx->infos[idx].n_dims = tensor->n_dims;
|
||||
for (int i = 0; i < tensor->n_dims; i++) {
|
||||
ctx->infos[idx].n_dims = ggml_n_dims(tensor);
|
||||
for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
|
||||
ctx->infos[idx].ne[i] = tensor->ne[i];
|
||||
}
|
||||
|
||||
|
||||
19
ggml.h
19
ggml.h
@@ -502,7 +502,6 @@ extern "C" {
|
||||
|
||||
struct ggml_backend_buffer * buffer;
|
||||
|
||||
int n_dims;
|
||||
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
||||
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
||||
// nb[0] = ggml_type_size(type)
|
||||
@@ -534,7 +533,7 @@ extern "C" {
|
||||
|
||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||
|
||||
char padding[12];
|
||||
char padding[8];
|
||||
};
|
||||
|
||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||
@@ -639,11 +638,14 @@ extern "C" {
|
||||
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
||||
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
|
||||
|
||||
GGML_API int ggml_blck_size (enum ggml_type type);
|
||||
GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
||||
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
||||
GGML_API int ggml_blck_size(enum ggml_type type);
|
||||
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
|
||||
GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
||||
|
||||
GGML_DEPRECATED(
|
||||
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
|
||||
"use ggml_row_size() instead");
|
||||
|
||||
GGML_API const char * ggml_type_name(enum ggml_type type);
|
||||
GGML_API const char * ggml_op_name (enum ggml_op op);
|
||||
@@ -662,6 +664,11 @@ extern "C" {
|
||||
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
||||
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
||||
|
||||
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
|
||||
47
llama.cpp
47
llama.cpp
@@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init(
|
||||
cache.cells.clear();
|
||||
cache.cells.resize(n_ctx);
|
||||
|
||||
cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead());
|
||||
cache.buf.resize(ggml_row_size(ktype, n_elements) + ggml_row_size(vtype, n_elements) + 2u*n_layer*ggml_tensor_overhead());
|
||||
memset(cache.buf.data, 0, cache.buf.size);
|
||||
|
||||
struct ggml_init_params params;
|
||||
@@ -2393,25 +2393,25 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
||||
|
||||
switch (ftype) {
|
||||
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
||||
case LLAMA_FTYPE_MOSTLY_F16: return "mostly F16";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_0: return "mostly Q4_0";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_1: return "mostly Q4_1";
|
||||
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
||||
return "mostly Q4_1, some F16";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_0: return "mostly Q5_0";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_1: return "mostly Q5_1";
|
||||
case LLAMA_FTYPE_MOSTLY_Q8_0: return "mostly Q8_0";
|
||||
return "Q4_1, some F16";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
||||
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
||||
|
||||
// K-quants
|
||||
case LLAMA_FTYPE_MOSTLY_Q2_K: return "mostly Q2_K";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "mostly Q3_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "mostly Q3_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "mostly Q3_K - Large";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "mostly Q4_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "mostly Q4_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "mostly Q5_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "mostly Q5_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q6_K: return "mostly Q6_K";
|
||||
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
||||
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
||||
|
||||
default: return "unknown, may not work";
|
||||
}
|
||||
@@ -2529,6 +2529,7 @@ static void llm_load_hparams(
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 22: model.type = e_model::MODEL_1B; break;
|
||||
case 26: model.type = e_model::MODEL_3B; break;
|
||||
case 32: model.type = e_model::MODEL_7B; break;
|
||||
case 40: model.type = e_model::MODEL_13B; break;
|
||||
@@ -3822,8 +3823,8 @@ static void llm_build_k_shift(
|
||||
ggml_rope_custom_inplace(ctx,
|
||||
ggml_view_3d(ctx, kv.k_l[il],
|
||||
n_embd_head, n_head_kv, n_ctx,
|
||||
ggml_type_sizef(kv.k_l[il]->type)*n_embd_head,
|
||||
ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa,
|
||||
ggml_row_size(kv.k_l[il]->type, n_embd_head),
|
||||
ggml_row_size(kv.k_l[il]->type, n_embd_gqa),
|
||||
0),
|
||||
K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow);
|
||||
@@ -3852,7 +3853,7 @@ static void llm_build_kv_store(
|
||||
cb(v_cur_t, "v_cur_t", il);
|
||||
|
||||
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_gqa,
|
||||
(ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa)*kv_head);
|
||||
(ggml_row_size(kv.k_l[il]->type, n_embd_gqa))*kv_head);
|
||||
cb(k_cache_view, "k_cache_view", il);
|
||||
|
||||
struct ggml_tensor * v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_gqa,
|
||||
@@ -4011,8 +4012,8 @@ static struct ggml_tensor * llm_build_kqv(
|
||||
struct ggml_tensor * k =
|
||||
ggml_view_3d(ctx, kv.k_l[il],
|
||||
n_embd_head, n_kv, n_head_kv,
|
||||
ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa,
|
||||
ggml_type_sizef(kv.k_l[il]->type)*n_embd_head,
|
||||
ggml_row_size(kv.k_l[il]->type, n_embd_gqa),
|
||||
ggml_row_size(kv.k_l[il]->type, n_embd_head),
|
||||
0);
|
||||
cb(k, "k", il);
|
||||
|
||||
@@ -8471,7 +8472,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
||||
|
||||
// quantize only 2D tensors
|
||||
quantize &= (tensor->n_dims == 2);
|
||||
quantize &= (ggml_n_dims(tensor) == 2);
|
||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||
quantize &= !params->only_copy;
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
numpy==1.24.4
|
||||
sentencepiece==0.1.98
|
||||
transformers>=4.34.0
|
||||
gguf>=0.1.0
|
||||
protobuf>=4.21.0
|
||||
|
||||
@@ -54,7 +54,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
||||
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
||||
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
||||
std::vector<uint8_t> dataq(ggml_type_size(tensor->type)*size/ggml_blck_size(tensor->type));
|
||||
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
||||
int64_t hist[16];
|
||||
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist);
|
||||
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
||||
@@ -72,6 +72,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
||||
|
||||
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
|
||||
size_t bs = ggml_blck_size(t->type);
|
||||
std::vector<float> vq(ggml_blck_size(t->type));
|
||||
bool quantized = ggml_is_quantized(t->type);
|
||||
|
||||
// access elements by index to avoid gaps in views
|
||||
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
|
||||
@@ -85,9 +87,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
||||
tv.push_back(*(float *) &buf[i]);
|
||||
} else if (t->type == GGML_TYPE_I32) {
|
||||
tv.push_back((float)*(int32_t *) &buf[i]);
|
||||
} else if (ggml_is_quantized(t->type)) {
|
||||
std::vector<float> vq(ggml_blck_size(t->type));
|
||||
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
|
||||
} else if (quantized) {
|
||||
tt.to_float(&buf[i], vq.data(), bs);
|
||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||
} else {
|
||||
GGML_ASSERT(false);
|
||||
|
||||
@@ -286,7 +286,7 @@ int main(int argc, char * argv[]) {
|
||||
qfns.from_float_reference(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
benchmark_function(size, quantized_size, iterations, quantize_fn);
|
||||
}
|
||||
printf("\n");
|
||||
@@ -300,7 +300,7 @@ int main(int argc, char * argv[]) {
|
||||
qfns.from_float(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
benchmark_function(size, quantized_size, iterations, quantize_fn);
|
||||
}
|
||||
printf("\n");
|
||||
@@ -315,7 +315,7 @@ int main(int argc, char * argv[]) {
|
||||
qfns.to_float(test_q1, test_out, size);
|
||||
return test_out[0];
|
||||
};
|
||||
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
benchmark_function(size, quantized_size, iterations, quantize_fn);
|
||||
}
|
||||
printf("\n");
|
||||
@@ -330,7 +330,7 @@ int main(int argc, char * argv[]) {
|
||||
vdot.from_float(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
benchmark_function(size, quantized_size, iterations, quantize_fn);
|
||||
}
|
||||
printf("\n");
|
||||
@@ -347,7 +347,7 @@ int main(int argc, char * argv[]) {
|
||||
qfns.vec_dot(size, &result, test_q1, test_q2);
|
||||
return result;
|
||||
};
|
||||
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
benchmark_function(size, quantized_size, iterations, quantize_fn);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
Reference in New Issue
Block a user