mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-12 14:03:20 +02:00
Compare commits
2 Commits
b2501
...
gg/hf-args
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c3d5b5a79 | ||
|
|
1b2f0a9ee8 |
11
.github/workflows/build.yml
vendored
11
.github/workflows/build.yml
vendored
@@ -135,9 +135,6 @@ jobs:
|
||||
|
||||
ubuntu-focal-make:
|
||||
runs-on: ubuntu-20.04
|
||||
env:
|
||||
LLAMA_NODE_AVAILABLE: true
|
||||
LLAMA_PYTHON_AVAILABLE: true
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -150,14 +147,6 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential gcc-8
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "20"
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Build
|
||||
id: make_build
|
||||
env:
|
||||
|
||||
@@ -99,7 +99,6 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some
|
||||
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
||||
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||
"llama: max. batch size for using peer access")
|
||||
option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF)
|
||||
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
||||
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
||||
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
||||
@@ -388,9 +387,6 @@ if (LLAMA_CUBLAS)
|
||||
endif()
|
||||
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
|
||||
if (LLAMA_CUDA_NO_PEER_COPY)
|
||||
add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
|
||||
endif()
|
||||
|
||||
if (LLAMA_STATIC)
|
||||
if (WIN32)
|
||||
@@ -535,10 +531,6 @@ if (LLAMA_HIPBLAS)
|
||||
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
|
||||
endif()
|
||||
|
||||
if (LLAMA_CUDA_NO_PEER_COPY)
|
||||
add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
|
||||
endif()
|
||||
|
||||
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||
|
||||
9
Makefile
9
Makefile
@@ -452,9 +452,9 @@ ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||
else
|
||||
MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
|
||||
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||
ifdef LLAMA_CUDA_NO_PEER_COPY
|
||||
MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
||||
endif # LLAMA_CUDA_NO_PEER_COPY
|
||||
#ifdef LLAMA_CUDA_CUBLAS
|
||||
# MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||
#endif # LLAMA_CUDA_CUBLAS
|
||||
ifdef LLAMA_CUDA_CCBIN
|
||||
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
@@ -535,9 +535,6 @@ endif # LLAMA_HIP_UMA
|
||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||
endif # LLAMA_CUDA_FORCE_DMMV
|
||||
ifdef LLAMA_CUDA_NO_PEER_COPY
|
||||
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
|
||||
endif # LLAMA_CUDA_NO_PEER_COPY
|
||||
OBJS += ggml-cuda.o
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
|
||||
@@ -647,14 +647,6 @@ static bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg,
|
||||
params.model = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-mu" || arg == "--model-url") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.model_url = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-md" || arg == "--model-draft") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -671,6 +663,30 @@ static bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg,
|
||||
params.model_alias = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-mu" || arg == "--model-url") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.model_url = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-hfr" || arg == "--hf-repo") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.hf_repo = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "-hff" || arg == "--hf-file") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.hf_file = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--lora") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -1403,10 +1419,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
|
||||
printf(" -m FNAME, --model FNAME\n");
|
||||
printf(" model path (default: %s)\n", params.model.c_str());
|
||||
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
|
||||
printf(" model download url (default: %s)\n", params.model_url.c_str());
|
||||
printf(" -md FNAME, --model-draft FNAME\n");
|
||||
printf(" draft model for speculative decoding\n");
|
||||
printf(" draft model for speculative decoding (default: unused)\n");
|
||||
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
|
||||
printf(" model download url (default: unused)\n");
|
||||
printf(" -hfr REPO, --hf-repo REPO\n");
|
||||
printf(" Hugging Face model repository (default: unused)\n");
|
||||
printf(" -hff FILE, --hf-file FILE\n");
|
||||
printf(" Hugging Face model file (default: unused)\n");
|
||||
printf(" -ld LOGDIR, --logdir LOGDIR\n");
|
||||
printf(" path under which to save YAML logs (no logging if unset)\n");
|
||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||
@@ -1655,8 +1675,10 @@ void llama_batch_add(
|
||||
|
||||
#ifdef LLAMA_USE_CURL
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
|
||||
struct llama_model_params params) {
|
||||
struct llama_model * llama_load_model_from_url(
|
||||
const char * model_url,
|
||||
const char * path_model,
|
||||
const struct llama_model_params & params) {
|
||||
// Basic validation of the model_url
|
||||
if (!model_url || strlen(model_url) == 0) {
|
||||
fprintf(stderr, "%s: invalid model_url\n", __func__);
|
||||
@@ -1850,25 +1872,62 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
||||
return llama_load_model_from_file(path_model, params);
|
||||
}
|
||||
|
||||
struct llama_model * llama_load_model_from_hf(
|
||||
const char * repo,
|
||||
const char * model,
|
||||
const char * path_model,
|
||||
const struct llama_model_params & params) {
|
||||
// construct hugging face model url:
|
||||
//
|
||||
// --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf
|
||||
// https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf
|
||||
//
|
||||
// --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf
|
||||
// https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf
|
||||
//
|
||||
|
||||
std::string model_url = "https://huggingface.co/";
|
||||
model_url += repo;
|
||||
model_url += "/resolve/main/";
|
||||
model_url += model;
|
||||
|
||||
return llama_load_model_from_url(model_url.c_str(), path_model, params);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * /*model_url*/, const char * /*path_model*/,
|
||||
struct llama_model_params /*params*/) {
|
||||
struct llama_model * llama_load_model_from_url(
|
||||
const char * /*model_url*/,
|
||||
const char * /*path_model*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct llama_model * llama_load_model_from_hf(
|
||||
const char * /*repo*/,
|
||||
const char * /*model*/,
|
||||
const char * /*path_model*/,
|
||||
const struct llama_model_params & /*params*/) {
|
||||
fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif // LLAMA_USE_CURL
|
||||
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
||||
auto mparams = llama_model_params_from_gpt_params(params);
|
||||
|
||||
llama_model * model = nullptr;
|
||||
if (!params.model_url.empty()) {
|
||||
|
||||
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
|
||||
model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), mparams);
|
||||
} else if (!params.model_url.empty()) {
|
||||
model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams);
|
||||
} else {
|
||||
model = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||
}
|
||||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
@@ -1908,7 +1967,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
||||
const std::string& lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
||||
int err = llama_model_apply_lora_from_file(model,
|
||||
lora_adapter.c_str(),
|
||||
|
||||
@@ -89,9 +89,11 @@ struct gpt_params {
|
||||
struct llama_sampling_params sparams;
|
||||
|
||||
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_draft = ""; // draft model for speculative decoding
|
||||
std::string model_alias = "unknown"; // model alias
|
||||
std::string model_url = ""; // model url to download
|
||||
std::string hf_repo = ""; // HF repo
|
||||
std::string hf_file = ""; // HF file
|
||||
std::string prompt = "";
|
||||
std::string prompt_file = ""; // store the external prompt file name
|
||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||
@@ -192,8 +194,8 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
|
||||
struct llama_model_params params);
|
||||
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
|
||||
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
|
||||
|
||||
// Batch utils
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
using json = nlohmann::json;
|
||||
|
||||
const std::string SPACE_RULE = "\" \"?";
|
||||
|
||||
@@ -124,7 +124,7 @@ static std::string replacePattern(const std::string & input, const std::regex &
|
||||
}
|
||||
|
||||
static std::string format_literal(const std::string & literal) {
|
||||
std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
|
||||
std::string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
|
||||
char c = match.str()[0];
|
||||
return GRAMMAR_LITERAL_ESCAPES.at(c);
|
||||
});
|
||||
@@ -137,7 +137,7 @@ private:
|
||||
std::function<json(const std::string &)> _fetch_json;
|
||||
bool _dotall;
|
||||
std::map<std::string, std::string> _rules;
|
||||
std::unordered_map<std::string, json> _refs;
|
||||
std::unordered_map<std::string, nlohmann::json> _refs;
|
||||
std::unordered_set<std::string> _refs_being_resolved;
|
||||
std::vector<std::string> _errors;
|
||||
std::vector<std::string> _warnings;
|
||||
@@ -413,7 +413,7 @@ private:
|
||||
std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name);
|
||||
prop_kv_rule_names[prop_name] = _add_rule(
|
||||
name + (name.empty() ? "" : "-") + prop_name + "-kv",
|
||||
format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name
|
||||
format_literal(prop_name) + " space \":\" space " + prop_rule_name
|
||||
);
|
||||
if (required.find(prop_name) != required.end()) {
|
||||
required_props.push_back(prop_name);
|
||||
@@ -495,7 +495,7 @@ public:
|
||||
_rules["space"] = SPACE_RULE;
|
||||
}
|
||||
|
||||
void resolve_refs(json & schema, const std::string & url) {
|
||||
void resolve_refs(nlohmann::json & schema, const std::string & url) {
|
||||
/*
|
||||
* Resolves all $ref fields in the given schema, fetching any remote schemas,
|
||||
* replacing each $ref with absolute reference URL and populates _refs with the
|
||||
@@ -557,7 +557,11 @@ public:
|
||||
}
|
||||
|
||||
std::string _generate_constant_rule(const json & value) {
|
||||
return format_literal(value.dump());
|
||||
if (!value.is_string()) {
|
||||
_errors.push_back("Only std::string constants are supported, got " + value.dump());
|
||||
return "";
|
||||
}
|
||||
return format_literal(value.get<std::string>());
|
||||
}
|
||||
|
||||
std::string visit(const json & schema, const std::string & name) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#pragma once
|
||||
#include "json.hpp"
|
||||
|
||||
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
||||
std::string json_schema_to_grammar(const nlohmann::json& schema);
|
||||
|
||||
@@ -61,7 +61,7 @@ class SchemaConverter:
|
||||
|
||||
def _format_literal(self, literal):
|
||||
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
||||
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), literal
|
||||
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
|
||||
)
|
||||
return f'"{escaped}"'
|
||||
|
||||
@@ -308,7 +308,8 @@ class SchemaConverter:
|
||||
return ref_name
|
||||
|
||||
def _generate_constant_rule(self, value):
|
||||
return self._format_literal(json.dumps(value))
|
||||
assert isinstance(value, str), f'Only string constants are supported, got {value}'
|
||||
return self._format_literal(value)
|
||||
|
||||
def visit(self, schema, name):
|
||||
schema_type = schema.get('type')
|
||||
@@ -427,7 +428,7 @@ class SchemaConverter:
|
||||
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
|
||||
prop_kv_rule_names[prop_name] = self._add_rule(
|
||||
f'{name}{"-" if name else ""}{prop_name}-kv',
|
||||
fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}'
|
||||
fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}'
|
||||
)
|
||||
required_props = [k for k in sorted_props if k in required]
|
||||
optional_props = [k for k in sorted_props if k not in required]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -48,7 +48,7 @@ export class SchemaConverter {
|
||||
}
|
||||
|
||||
_formatLiteral(literal) {
|
||||
const escaped = literal.replace(
|
||||
const escaped = JSON.stringify(literal).replace(
|
||||
GRAMMAR_LITERAL_ESCAPE_RE,
|
||||
m => GRAMMAR_LITERAL_ESCAPES[m]
|
||||
);
|
||||
@@ -327,7 +327,10 @@ export class SchemaConverter {
|
||||
}
|
||||
|
||||
_generateConstantRule(value) {
|
||||
return this._formatLiteral(JSON.stringify(value));
|
||||
if (typeof value !== 'string') {
|
||||
throw new Error('Only string constants are supported, got ' + JSON.stringify(value));
|
||||
}
|
||||
return this._formatLiteral(value);
|
||||
}
|
||||
|
||||
visit(schema, name) {
|
||||
@@ -343,6 +346,9 @@ export class SchemaConverter {
|
||||
} else if (Array.isArray(schemaType)) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
||||
} else if ('const' in schema) {
|
||||
if (typeof schema.const !== 'string') {
|
||||
throw new Error('Only string constants are supported, got ' + JSON.stringify(schema.const));
|
||||
}
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
||||
@@ -451,7 +457,7 @@ export class SchemaConverter {
|
||||
const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
|
||||
propKvRuleNames[propName] = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
|
||||
`${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
|
||||
`${this._formatLiteral(propName)} space ":" space ${propRuleName}`
|
||||
);
|
||||
}
|
||||
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include <signal.h>
|
||||
#include <memory>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
using json = nlohmann::json;
|
||||
|
||||
bool server_verbose = false;
|
||||
bool server_log_json = true;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
using json = nlohmann::json;
|
||||
|
||||
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
|
||||
enum error_type {
|
||||
|
||||
28
ggml-cuda.cu
28
ggml-cuda.cu
@@ -771,11 +771,7 @@ GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t
|
||||
if (src_ctx->device == dst_ctx->device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
|
||||
} else {
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
return false;
|
||||
#else
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
||||
#endif
|
||||
}
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
return true;
|
||||
@@ -11326,23 +11322,19 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
|
||||
GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device);
|
||||
GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device);
|
||||
|
||||
// copy on src stream
|
||||
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst->stream()));
|
||||
} else {
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
return false;
|
||||
#else
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream()));
|
||||
#endif
|
||||
}
|
||||
|
||||
// record event on src stream
|
||||
if (!cuda_ctx_src->copy_event) {
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
||||
}
|
||||
|
||||
// copy on src stream
|
||||
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst->stream()));
|
||||
} else {
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), cuda_ctx_src->stream()));
|
||||
}
|
||||
|
||||
// record event on src stream
|
||||
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, cuda_ctx_src->stream()));
|
||||
|
||||
// wait on dst stream for the copy to complete
|
||||
@@ -11538,9 +11530,6 @@ GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const
|
||||
}
|
||||
|
||||
static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) {
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
return nullptr;
|
||||
#else
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
|
||||
ggml_cuda_set_device(cuda_ctx->device);
|
||||
@@ -11552,7 +11541,6 @@ static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend)
|
||||
/* .backend = */ backend,
|
||||
/* .context = */ event,
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_event_free(ggml_backend_event_t event) {
|
||||
|
||||
@@ -90,7 +90,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
|
||||
test({
|
||||
FAILURE,
|
||||
"invalid type",
|
||||
"invalid type type",
|
||||
R"""({
|
||||
"type": 123
|
||||
})""",
|
||||
@@ -193,27 +193,21 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
FAILURE,
|
||||
"non-string const",
|
||||
R"""({
|
||||
"const": 123
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "123"
|
||||
space ::= " "?
|
||||
)"""
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
FAILURE,
|
||||
"non-string enum",
|
||||
R"""({
|
||||
"enum": ["red", "amber", "green", null, 42, ["foo"]]
|
||||
"enum": [123]
|
||||
})""",
|
||||
R"""(
|
||||
root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
|
||||
space ::= " "?
|
||||
)"""
|
||||
""
|
||||
});
|
||||
|
||||
test({
|
||||
@@ -384,18 +378,20 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required props in original order",
|
||||
"required props",
|
||||
R"""({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"b": {"type": "string"},
|
||||
"c": {"type": "string"},
|
||||
"a": {"type": "string"}
|
||||
"a": {
|
||||
"type": "string"
|
||||
},
|
||||
"b": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"a",
|
||||
"b",
|
||||
"c"
|
||||
"b"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"definitions": {}
|
||||
@@ -403,8 +399,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
R"""(
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
b-kv ::= "\"b\"" space ":" space string
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
|
||||
root ::= "{" space a-kv "," space b-kv "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
@@ -463,13 +458,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
|
||||
test({
|
||||
SUCCESS,
|
||||
"required + optional props each in original order",
|
||||
"required + optional props",
|
||||
R"""({
|
||||
"properties": {
|
||||
"b": {"type": "string"},
|
||||
"a": {"type": "string"},
|
||||
"d": {"type": "string"},
|
||||
"c": {"type": "string"}
|
||||
"b": {"type": "string"},
|
||||
"c": {"type": "string"},
|
||||
"d": {"type": "string"}
|
||||
},
|
||||
"required": ["a", "b"],
|
||||
"additionalProperties": false
|
||||
@@ -478,14 +473,14 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
a-kv ::= "\"a\"" space ":" space string
|
||||
b-kv ::= "\"b\"" space ":" space string
|
||||
c-kv ::= "\"c\"" space ":" space string
|
||||
c-rest ::= ( "," space d-kv )?
|
||||
d-kv ::= "\"d\"" space ":" space string
|
||||
d-rest ::= ( "," space c-kv )?
|
||||
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
|
||||
space ::= " "?
|
||||
string ::= "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
)* "\"" space
|
||||
)"""
|
||||
});
|
||||
|
||||
@@ -653,16 +648,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
"$ref": "#/definitions/MyType",
|
||||
"definitions": {
|
||||
"MyType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"a"
|
||||
],
|
||||
"additionalProperties": false
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"a"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
})""",
|
||||
@@ -688,10 +683,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
],
|
||||
"definitions": {
|
||||
"foo": {
|
||||
"properties": {"a": {"type": "number"}}
|
||||
"properties": {"a": {"type": "number"}}
|
||||
},
|
||||
"bar": {
|
||||
"properties": {"b": {"type": "number"}}
|
||||
"properties": {"b": {"type": "number"}}
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
@@ -725,16 +720,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
],
|
||||
"definitions": {
|
||||
"foo": {
|
||||
"properties": {"a": {"type": "number"}}
|
||||
"properties": {"a": {"type": "number"}}
|
||||
},
|
||||
"bar": {
|
||||
"properties": {"b": {"type": "number"}}
|
||||
"properties": {"b": {"type": "number"}}
|
||||
},
|
||||
"bam": {
|
||||
"properties": {"c": {"type": "number"}}
|
||||
"properties": {"c": {"type": "number"}}
|
||||
},
|
||||
"baz": {
|
||||
"properties": {"d": {"type": "number"}}
|
||||
"properties": {"d": {"type": "number"}}
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
@@ -762,15 +757,15 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
"properties": {
|
||||
"number": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"root": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"root"
|
||||
],
|
||||
"additionalProperties": false
|
||||
"properties": {
|
||||
"root": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"root"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
@@ -799,40 +794,27 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||
}
|
||||
|
||||
int main() {
|
||||
fprintf(stderr, "LLAMA_NODE_AVAILABLE = %s\n", getenv("LLAMA_NODE_AVAILABLE") ? "true" : "false");
|
||||
fprintf(stderr, "LLAMA_PYTHON_AVAILABLE = %s\n", getenv("LLAMA_PYTHON_AVAILABLE") ? "true" : "false");
|
||||
|
||||
test_all("C++", [](const TestCase & tc) {
|
||||
try {
|
||||
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema)));
|
||||
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
|
||||
tc.verify_status(SUCCESS);
|
||||
} catch (const std::runtime_error & ex) {
|
||||
fprintf(stderr, "Error: %s\n", ex.what());
|
||||
tc.verify_status(FAILURE);
|
||||
}
|
||||
});
|
||||
|
||||
if (getenv("LLAMA_PYTHON_AVAILABLE") || (std::system("python --version") == 0)) {
|
||||
test_all("Python", [](const TestCase & tc) {
|
||||
write("test-json-schema-input.tmp", tc.schema);
|
||||
tc.verify_status(std::system(
|
||||
"python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
tc.verify(read("test-grammar-output.tmp"));
|
||||
});
|
||||
} else {
|
||||
fprintf(stderr, "\033[33mWARNING: Python not found, skipping Python JSON schema -> grammar tests.\n\033[0m");
|
||||
}
|
||||
|
||||
if (getenv("LLAMA_NODE_AVAILABLE") || (std::system("node --version") == 0)) {
|
||||
test_all("JavaScript", [](const TestCase & tc) {
|
||||
write("test-json-schema-input.tmp", tc.schema);
|
||||
tc.verify_status(std::system(
|
||||
"node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
tc.verify(read("test-grammar-output.tmp"));
|
||||
});
|
||||
} else {
|
||||
fprintf(stderr, "\033[33mWARNING: Node not found, skipping JavaScript JSON schema -> grammar tests.\n\033[0m");
|
||||
}
|
||||
//test_all("Python", [](const TestCase & tc) {
|
||||
// write("test-json-schema-input.tmp", tc.schema);
|
||||
// tc.verify_status(std::system(
|
||||
// "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
// tc.verify(read("test-grammar-output.tmp"));
|
||||
//});
|
||||
//test_all("JavaScript", [](const TestCase & tc) {
|
||||
// write("test-json-schema-input.tmp", tc.schema);
|
||||
// tc.verify_status(std::system(
|
||||
// "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
|
||||
// tc.verify(read("test-grammar-output.tmp"));
|
||||
//});
|
||||
|
||||
test_all("Check Expectations Validity", [](const TestCase & tc) {
|
||||
if (tc.expected_status == SUCCESS) {
|
||||
|
||||
Reference in New Issue
Block a user