Compare commits

..

1 Commits

Author SHA1 Message Date
Francis Couture-Harpin
faaac59d16 llama : support NUL bytes in tokens 2024-08-11 21:00:03 -04:00
11 changed files with 38 additions and 27 deletions

View File

@@ -6,13 +6,15 @@ on:
- '.github/workflows/python-check-requirements.yml'
- 'scripts/check-requirements.sh'
- 'convert*.py'
- '**/requirements*.txt'
- 'requirements.txt'
- 'requirements/*.txt'
pull_request:
paths:
- '.github/workflows/python-check-requirements.yml'
- 'scripts/check-requirements.sh'
- 'convert*.py'
- '**/requirements*.txt'
- 'requirements.txt'
- 'requirements/*.txt'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}

View File

@@ -2224,9 +2224,8 @@ class InternLM2Model(Model):
def set_vocab(self):
# (TODO): Is there a better way?
# Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
# \x00 specially and convert it into an emoji character to prevent it from being mistakenly
# recognized as an empty string in C++.
# Copy from _set_vocab_sentencepiece, The only difference is that we find mislabeled UNUSED tokens,
# and that we set '<|im_end|>' as the eos token for chat models.
from sentencepiece import SentencePieceProcessor
from sentencepiece import sentencepiece_model_pb2 as model
@@ -2253,11 +2252,6 @@ class InternLM2Model(Model):
piece = tokenizer.IdToPiece(token_id)
text = piece.encode("utf-8")
score = tokenizer.GetScore(token_id)
if text == b"\x00":
# (TODO): fixme
# Hack here and replace the \x00 characters.
logger.warning(f"InternLM2 convert token '{text}' to '🐉'!")
text = "🐉".encode("utf-8")
toktype = SentencePieceTokenTypes.NORMAL
if tokenizer.IsUnknown(token_id):

View File

@@ -561,7 +561,7 @@ static void load_vocab(const char * filename, const Config * config, struct llam
vocab->id_to_token.resize(n_vocab);
for (uint32_t i = 0; i < n_vocab; i++) {
std::string word = gguf_get_arr_str(ctx, token_idx, i);
std::string word(gguf_get_arr_str(ctx, token_idx, i), gguf_get_arr_str_n(ctx, token_idx, i));
vocab->token_to_id[word] = i;

View File

@@ -12,7 +12,7 @@ static bool g_verbose = false;
static std::string get_kv_str(struct gguf_context * ctx_gguf, const std::string & key){
int id = gguf_find_key(ctx_gguf, key.c_str());
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id), gguf_get_val_str_n(ctx_gguf, id));
}
static float get_kv_f32(struct gguf_context * ctx_gguf, const std::string & key) {

View File

@@ -225,7 +225,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
return std::string(gguf_get_val_str(ctx_gguf, i), gguf_get_val_str_n(ctx_gguf, i));
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
@@ -235,7 +235,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
std::string val(gguf_get_arr_str(ctx_gguf, i, j), gguf_get_arr_str_n(ctx_gguf, i, j));
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");

View File

@@ -2,4 +2,4 @@
--extra-index-url https://download.pytorch.org/whl/cpu
pillow~=10.2.0
torch~=2.2.1
torchvision~=0.17.1
torchvision==0.17.1

View File

@@ -631,7 +631,6 @@ struct server_context {
bool clean_kv_cache = true;
bool add_bos_token = true;
bool has_eos_token = false;
int32_t n_ctx; // total context for all clients / slots
@@ -694,7 +693,7 @@ struct server_context {
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model);
has_eos_token = llama_add_eos_token(model) != 1;
GGML_ASSERT(llama_add_eos_token(model) != 1);
return true;
}
@@ -1032,7 +1031,7 @@ struct server_context {
{
slot.sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false) && has_eos_token) {
if (json_value(data, "ignore_eos", false)) {
slot.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}

View File

@@ -2313,10 +2313,12 @@ extern "C" {
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
GGML_API int gguf_get_val_str_n(const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
GGML_API int gguf_get_arr_str_n(const struct gguf_context * ctx, int key_id, int i);
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);

View File

@@ -21129,7 +21129,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
(int64_t) info->ne[2] *
(int64_t) info->ne[3];
if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
if (ne % ggml_blck_size(info->type) != 0) {
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
fclose(file);
@@ -21335,6 +21335,14 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
return str->data;
}
int gguf_get_arr_str_n(const struct gguf_context * ctx, int key_id, int i) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->n;
}
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
@@ -21413,6 +21421,12 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
return ctx->kv[key_id].value.str.data;
}
int gguf_get_val_str_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
return ctx->kv[key_id].value.str.n;
}
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);

View File

@@ -85,14 +85,14 @@ void llama_sample_top_k_impl(struct llama_sampling * smpl, llama_token_data_arra
constexpr float bucket_low = -10.0f;
constexpr float bucket_high = 10.0f;
constexpr float bucket_scale = nbuckets/(bucket_high - bucket_low);
constexpr float bucket_inter = -bucket_low * bucket_scale;
constexpr float bucker_inter = -bucket_low * bucket_scale;
std::vector<int> bucket_idx(candidates->size);
std::vector<int> histo(nbuckets, 0);
for (int i = 0; i < (int)candidates->size; ++i) {
const float val = candidates->data[i].logit;
int ib = int(bucket_scale * val + bucket_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
int ib = int(bucket_scale * val + bucker_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
ib = std::max(0, std::min(nbuckets-1, ib));
bucket_idx[i] = ib;
++histo[ib];

View File

@@ -1406,7 +1406,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
return std::string(gguf_get_val_str(ctx_gguf, i), gguf_get_val_str_n(ctx_gguf, i));
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
@@ -1416,7 +1416,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
std::string val(gguf_get_arr_str(ctx_gguf, i, j), gguf_get_arr_str_n(ctx_gguf, i, j));
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
@@ -3436,7 +3436,7 @@ namespace GGUFMeta {
static constexpr gguf_type gt = GGUF_TYPE_STRING;
static std::string getter(const gguf_context * ctx, const int kid) {
return gguf_get_val_str(ctx, kid);
return std::string(gguf_get_val_str(ctx, kid), gguf_get_val_str_n(ctx, kid));
}
};
@@ -5316,7 +5316,7 @@ static void llm_load_vocab(
const int n_merges = gguf_get_arr_n(ctx, merges_keyidx);
for (int i = 0; i < n_merges; i++) {
const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i);
const std::string word(gguf_get_arr_str(ctx, merges_keyidx, i), gguf_get_arr_str_n(ctx, merges_keyidx, i));
GGML_ASSERT(unicode_cpts_from_utf8(word).size() > 0);
std::string first;
@@ -5521,7 +5521,7 @@ static void llm_load_vocab(
vocab.id_to_token.resize(n_vocab);
for (uint32_t i = 0; i < n_vocab; i++) {
std::string word = gguf_get_arr_str(ctx, token_idx, i);
std::string word(gguf_get_arr_str(ctx, token_idx, i), gguf_get_arr_str_n(ctx, token_idx, i));
GGML_ASSERT(unicode_cpts_from_utf8(word).size() > 0);
vocab.token_to_id[word] = i;
@@ -16207,7 +16207,7 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
{
auto get_kv_str = [&](const std::string & key) -> std::string {
int id = gguf_find_key(ctx_gguf, key.c_str());
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id), gguf_get_val_str_n(ctx_gguf, id));
};
auto get_kv_f32 = [&](const std::string & key) -> float {
int id = gguf_find_key(ctx_gguf, key.c_str());