mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-26 14:23:22 +02:00
Compare commits
3 Commits
master-34a
...
master-99d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99d29c0094 | ||
|
|
3d9a551816 | ||
|
|
f6f9896ac3 |
57
ggml-metal.m
57
ggml-metal.m
@@ -7,6 +7,11 @@
|
||||
#import <Metal/Metal.h>
|
||||
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#ifdef GGML_METAL_NDEBUG
|
||||
#define metal_printf(...)
|
||||
#else
|
||||
@@ -15,6 +20,8 @@
|
||||
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
||||
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
|
||||
|
||||
struct ggml_metal_buffer {
|
||||
const char * name;
|
||||
|
||||
@@ -36,7 +43,7 @@ struct ggml_metal_context {
|
||||
int n_buffers;
|
||||
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
||||
|
||||
int concur_list[GGML_MAX_NODES];
|
||||
int concur_list[GGML_MAX_CONCUR];
|
||||
int concur_list_len;
|
||||
|
||||
// custom kernels
|
||||
@@ -370,15 +377,15 @@ void ggml_metal_graph_find_concurrency(
|
||||
struct ggml_metal_context * ctx,
|
||||
struct ggml_cgraph * gf) {
|
||||
int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
|
||||
int nodes_unused[GGML_MAX_NODES];
|
||||
int nodes_unused[GGML_MAX_CONCUR];
|
||||
|
||||
for (int i = 0; i < GGML_MAX_NODES; i++) {ctx->concur_list[i] = 0;}
|
||||
for (int i = 0; i < gf->n_nodes; i++) {nodes_unused[i] = 1;}
|
||||
for (int i = 0; i < GGML_MAX_CONCUR; i++) { ctx->concur_list[i] = 0; }
|
||||
for (int i = 0; i < gf->n_nodes; i++) { nodes_unused[i] = 1; }
|
||||
ctx->concur_list_len = 0;
|
||||
|
||||
int n_left = gf->n_nodes;
|
||||
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
||||
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
||||
int n_left = gf->n_nodes;
|
||||
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
||||
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
||||
|
||||
while (n_left > 0) {
|
||||
// number of nodes at a layer (that can be issued concurrently)
|
||||
@@ -386,28 +393,40 @@ void ggml_metal_graph_find_concurrency(
|
||||
for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) {
|
||||
if (nodes_unused[i]) {
|
||||
// if the requirements for gf->nodes[i] are satisfied
|
||||
int exe_flag=1;
|
||||
int exe_flag = 1;
|
||||
|
||||
// scan all srcs
|
||||
for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) {
|
||||
struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind];
|
||||
if (src_cur) {
|
||||
// if is leaf nodes it's satisfied.
|
||||
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {continue;}
|
||||
// TODO: ggml_is_leaf()
|
||||
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// otherwise this src should be the output from previous nodes.
|
||||
int is_found = 0;
|
||||
|
||||
// scan 2*search_depth back because we inserted barrier.
|
||||
for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
||||
if (gf->nodes[ctx->concur_list[j]] == src_cur) {is_found = 1; break;}
|
||||
//for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
||||
for (int j = MAX(0, level_pos - 2*search_depth); j < level_pos; j++) {
|
||||
if (ctx->concur_list[j] >= 0 && gf->nodes[ctx->concur_list[j]] == src_cur) {
|
||||
is_found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (is_found == 0) {
|
||||
exe_flag = 0;
|
||||
break;
|
||||
}
|
||||
if (is_found == 0) {exe_flag = 0; break;}
|
||||
}
|
||||
}
|
||||
if (exe_flag) {
|
||||
// check if nodes[i]'s data will be overwritten by a node before nodes[i].
|
||||
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
|
||||
int64_t data_start = (int64_t) gf->nodes[i]->data;
|
||||
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
||||
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
||||
for (int j = n_start; j < i; j++) {
|
||||
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
|
||||
&& gf->nodes[j]->op != GGML_OP_VIEW \
|
||||
@@ -416,9 +435,9 @@ void ggml_metal_graph_find_concurrency(
|
||||
if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
|
||||
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
|
||||
continue;
|
||||
} else {
|
||||
exe_flag = 0;
|
||||
}
|
||||
|
||||
exe_flag = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -435,11 +454,13 @@ void ggml_metal_graph_find_concurrency(
|
||||
ctx->concur_list[level_pos + concurrency] = -1;
|
||||
ctx->concur_list_len++;
|
||||
// jump all sorted nodes at nodes_bak
|
||||
while (!nodes_unused[n_start]) {n_start++;}
|
||||
while (!nodes_unused[n_start]) {
|
||||
n_start++;
|
||||
}
|
||||
level_pos += concurrency + 1;
|
||||
}
|
||||
|
||||
if (ctx->concur_list_len > GGML_MAX_NODES) {
|
||||
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
|
||||
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
|
||||
}
|
||||
}
|
||||
@@ -453,7 +474,7 @@ void ggml_metal_graph_compute(
|
||||
// else fallback to serial dispatch
|
||||
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
||||
|
||||
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_NODES;
|
||||
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_CONCUR;
|
||||
|
||||
const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes;
|
||||
edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial;
|
||||
|
||||
387
ggml.c
387
ggml.c
@@ -195,8 +195,8 @@ typedef void * thread_ret_t;
|
||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
#else
|
||||
inline static void* ggml_aligned_malloc(size_t size) {
|
||||
void* aligned_memory = NULL;
|
||||
inline static void * ggml_aligned_malloc(size_t size) {
|
||||
void * aligned_memory = NULL;
|
||||
#ifdef GGML_USE_METAL
|
||||
int result = posix_memalign(&aligned_memory, getpagesize(), size);
|
||||
#else
|
||||
@@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
@@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
||||
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
@@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
|
||||
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
||||
}
|
||||
|
||||
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return
|
||||
@@ -6890,7 +6890,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
|
||||
a->ne[2], 1, 1,
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { s0, p0, d0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
@@ -6905,10 +6905,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
|
||||
// ggml_conv_2d
|
||||
|
||||
struct ggml_tensor* ggml_conv_2d(
|
||||
struct ggml_context* ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
@@ -6929,7 +6929,7 @@ struct ggml_tensor* ggml_conv_2d(
|
||||
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
|
||||
a->ne[3], b->ne[3],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
@@ -6945,7 +6945,7 @@ struct ggml_tensor* ggml_conv_2d(
|
||||
|
||||
// ggml_conv_1d_ph
|
||||
|
||||
struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_tensor * ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
@@ -6963,7 +6963,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
||||
|
||||
// ggml_pool_1d
|
||||
|
||||
struct ggml_tensor* ggml_pool_1d(
|
||||
struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -6982,7 +6982,7 @@ struct ggml_tensor* ggml_pool_1d(
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
a->ne[1],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
int32_t params[] = { op, k0, s0, p0 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
@@ -6996,7 +6996,7 @@ struct ggml_tensor* ggml_pool_1d(
|
||||
|
||||
// ggml_pool_2d
|
||||
|
||||
struct ggml_tensor* ggml_pool_2d(
|
||||
struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -7019,7 +7019,7 @@ struct ggml_tensor* ggml_pool_2d(
|
||||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
||||
@@ -7349,7 +7349,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
// ggml_map_custom1_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
@@ -7366,7 +7366,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->op = GGML_OP_MAP_CUSTOM1_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
@@ -7387,7 +7387,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
// ggml_map_custom2_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
@@ -7405,7 +7405,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->op = GGML_OP_MAP_CUSTOM2_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
@@ -7429,7 +7429,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
// ggml_map_custom3_f32
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
@@ -7448,7 +7448,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
|
||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->op = GGML_OP_MAP_CUSTOM3_F32;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
@@ -7475,6 +7475,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
struct ggml_map_custom1_op_params {
|
||||
ggml_custom1_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom1_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom1_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
|
||||
struct ggml_map_custom2_op_params {
|
||||
ggml_custom2_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom2_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom2_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
|
||||
struct ggml_map_custom3_op_params {
|
||||
ggml_custom3_op_t fun;
|
||||
int n_tasks;
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
static struct ggml_tensor * ggml_map_custom3_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
struct ggml_map_custom3_op_params params = {
|
||||
/*.fun =*/ fun,
|
||||
/*.n_tasks =*/ n_tasks,
|
||||
/*.userdata =*/ userdata
|
||||
};
|
||||
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
result->src[2] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata) {
|
||||
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ggml_cross_entropy_loss
|
||||
|
||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
@@ -9283,8 +9467,8 @@ static void ggml_compute_forward_sum_rows_f32(
|
||||
for (int64_t i3 = 0; i3 < ne03; i3++) {
|
||||
for (int64_t i2 = 0; i2 < ne02; i2++) {
|
||||
for (int64_t i1 = 0; i1 < ne01; i1++) {
|
||||
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||
float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
float row_sum = 0;
|
||||
ggml_vec_sum_f32(ne00, &row_sum, src_row);
|
||||
dst_row[0] = row_sum;
|
||||
@@ -12894,7 +13078,7 @@ static void ggml_compute_forward_pool_1d(
|
||||
const struct ggml_tensor * src0,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int s0 = opts[2];
|
||||
@@ -14227,24 +14411,6 @@ static void ggml_compute_forward_map_custom1_f32(
|
||||
fun(dst, a);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2_f32(
|
||||
@@ -14263,24 +14429,6 @@ static void ggml_compute_forward_map_custom2_f32(
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3_f32(
|
||||
@@ -14299,24 +14447,52 @@ static void ggml_compute_forward_map_custom3_f32(
|
||||
fun(dst, a, b, c);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom1
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
const struct ggml_tensor * c,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
struct ggml_tensor * dst) {
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
|
||||
|
||||
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_cross_entropy_loss
|
||||
@@ -14838,25 +15014,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
{
|
||||
ggml_custom1_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
||||
ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
ggml_custom2_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
ggml_custom3_op_f32_t fun;
|
||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
@@ -15664,6 +15855,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
} break;
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
@@ -16449,12 +16643,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
||||
case GGML_OP_WIN_UNPART:
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
case GGML_OP_MAP_CUSTOM1_F32:
|
||||
case GGML_OP_MAP_CUSTOM2_F32:
|
||||
case GGML_OP_MAP_CUSTOM3_F32:
|
||||
{
|
||||
n_tasks = 1;
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
||||
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||
n_tasks = n_threads;
|
||||
} else {
|
||||
n_tasks = MIN(p->n_tasks, n_threads);
|
||||
}
|
||||
} break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
|
||||
145
ggml.h
145
ggml.h
@@ -183,6 +183,15 @@
|
||||
# define GGML_API
|
||||
#endif
|
||||
|
||||
// TODO: support for clang
|
||||
#ifdef __GNUC__
|
||||
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
||||
#else
|
||||
# define GGML_DEPRECATED(func, hint) func
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
@@ -374,6 +383,10 @@ extern "C" {
|
||||
GGML_OP_MAP_UNARY,
|
||||
GGML_OP_MAP_BINARY,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1_F32,
|
||||
GGML_OP_MAP_CUSTOM2_F32,
|
||||
GGML_OP_MAP_CUSTOM3_F32,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1,
|
||||
GGML_OP_MAP_CUSTOM2,
|
||||
GGML_OP_MAP_CUSTOM3,
|
||||
@@ -570,6 +583,8 @@ extern "C" {
|
||||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
// use this to compute the memory overhead of a tensor
|
||||
GGML_API size_t ggml_tensor_overhead(void);
|
||||
|
||||
@@ -1240,7 +1255,7 @@ extern "C" {
|
||||
|
||||
// conv_1d with padding = half
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
@@ -1253,7 +1268,7 @@ extern "C" {
|
||||
GGML_OP_POOL_COUNT,
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_1d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -1261,7 +1276,7 @@ extern "C" {
|
||||
int s0, // stride
|
||||
int p0); // padding
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_2d(
|
||||
GGML_API struct ggml_tensor * ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
@@ -1315,15 +1330,6 @@ extern "C" {
|
||||
int h0,
|
||||
int w);
|
||||
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_unary(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
@@ -1334,63 +1340,138 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_unary_op op);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
ggml_unary_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
ggml_binary_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
ggml_custom1_op_f32_t fun),
|
||||
"use ggml_map_custom1_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
ggml_custom2_op_f32_t fun),
|
||||
"use ggml_map_custom2_inplace instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3 instead");
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
ggml_custom3_op_f32_t fun),
|
||||
"use ggml_map_custom3_inplace instead");
|
||||
|
||||
// custom operators v2
|
||||
|
||||
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||
|
||||
#define GGML_N_TASKS_MAX -1
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_t fun,
|
||||
int n_tasks,
|
||||
void * userdata);
|
||||
|
||||
// loss function
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ struct llama_mmap {
|
||||
// prefetch/readahead impairs performance on NUMA systems
|
||||
if (numa) { prefetch = 0; }
|
||||
#ifdef __linux__
|
||||
if (prefetch) { flags |= MAP_POPULATE; }
|
||||
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
|
||||
#endif
|
||||
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
|
||||
@@ -747,12 +747,12 @@ struct llama_model_loader {
|
||||
|
||||
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||
size_t data_size = 0;
|
||||
size_t prefetch_size = 0;
|
||||
size_t prefetch_size = file_loader->file.size;
|
||||
size_t lock_size = 0;
|
||||
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
||||
data_size += lt.size;
|
||||
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
|
||||
prefetch_size += lt.size;
|
||||
if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
|
||||
prefetch_size -= lt.size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user