llama : print timings on ctrl+c exit (#1021 )

* print timings on ctrl+c exit * remove redundant free memory call. * add global pointer to ctx.
llama : have n_batch default to 512 (#1091 )
2026-03-05 14:33:24 +02:00 · 2023-04-22 11:56:35 +03:00 · 2023-04-22 11:27:05 +03:00
2 changed files with 5 additions and 2 deletions
--- a/examples/common.h
+++ b/examples/common.h
@@ -20,7 +20,7 @@ struct gpt_params {
    int32_t repeat_last_n = 64;   // last n tokens to penalize
    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
    int32_t n_ctx         = 512;  // context size
-    int32_t n_batch       = 8;    // batch size for prompt processing
+    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep        = 0;    // number of tokens to keep from initial prompt

    // sampling parameters
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -25,6 +25,7 @@
 #endif

 static console_state con_st;
+static llama_context ** g_ctx;

 static bool is_interacting = false;

@@ -36,6 +37,7 @@ void sigint_handler(int signo) {
        if (!is_interacting) {
            is_interacting=true;
        } else {
+            llama_print_timings(*g_ctx);
            _exit(130);
        }
    }
@@ -92,8 +94,9 @@ int main(int argc, char ** argv) {

 //    params.prompt = R"(// this function checks if the number n is prime
 //bool is_prime(int n) {)";
-
+    
    llama_context * ctx;
+    g_ctx = &ctx;

    // load the model
    {