Compare commits

..

5 Commits

Author SHA1 Message Date
Stephan Walter
81040f10aa llama : do not allocate KV cache for "vocab_only == true" (#682)
Fixes sanitizer CI
2023-04-02 10:18:53 +03:00
Fabian
c4f89d8d73 make : use -march=native -mtune=native on x86 (#609) 2023-04-02 10:17:05 +03:00
Murilo Santana
5b70e7de4c fix default params for examples/main (#697) 2023-04-02 04:41:12 +02:00
Ikko Eltociear Ashimine
a717cba844 py: huggingface -> Hugging Face (#686) 2023-04-01 18:38:18 +02:00
rimoliga
d0a7f742e7 readme: replace termux links with homepage, play store is deprecated (#680) 2023-04-01 16:57:30 +02:00
5 changed files with 11 additions and 96 deletions

View File

@@ -70,95 +70,8 @@ endif
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
ifeq ($(UNAME_S),Darwin)
F16C_M := $(shell sysctl machdep.cpu.features)
ifneq (,$(findstring F16C,$(F16C_M)))
CFLAGS += -mf16c
endif
AVX1_M := $(shell sysctl machdep.cpu.features)
ifneq (,$(findstring FMA,$(AVX1_M)))
CFLAGS += -mfma
endif
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
CFLAGS += -mavx
endif
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
ifneq (,$(findstring AVX2,$(AVX2_M)))
CFLAGS += -mavx2
endif
else ifeq ($(UNAME_S),Linux)
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
ifneq (,$(findstring avx,$(AVX1_M)))
CFLAGS += -mavx
endif
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
ifneq (,$(findstring avx2,$(AVX2_M)))
CFLAGS += -mavx2
endif
FMA_M := $(shell grep "fma " /proc/cpuinfo)
ifneq (,$(findstring fma,$(FMA_M)))
CFLAGS += -mfma
endif
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
ifneq (,$(findstring f16c,$(F16C_M)))
CFLAGS += -mf16c
endif
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
ifneq (,$(findstring sse3,$(SSE3_M)))
CFLAGS += -msse3
endif
AVX512F_M := $(shell grep "avx512f " /proc/cpuinfo)
ifneq (,$(findstring avx512f,$(AVX512F_M)))
CFLAGS += -mavx512f
endif
AVX512BW_M := $(shell grep "avx512bw " /proc/cpuinfo)
ifneq (,$(findstring avx512bw,$(AVX512BW_M)))
CFLAGS += -mavx512bw
endif
AVX512DQ_M := $(shell grep "avx512dq " /proc/cpuinfo)
ifneq (,$(findstring avx512dq,$(AVX512DQ_M)))
CFLAGS += -mavx512dq
endif
AVX512VL_M := $(shell grep "avx512vl " /proc/cpuinfo)
ifneq (,$(findstring avx512vl,$(AVX512VL_M)))
CFLAGS += -mavx512vl
endif
AVX512CD_M := $(shell grep "avx512cd " /proc/cpuinfo)
ifneq (,$(findstring avx512cd,$(AVX512CD_M)))
CFLAGS += -mavx512cd
endif
AVX512ER_M := $(shell grep "avx512er " /proc/cpuinfo)
ifneq (,$(findstring avx512er,$(AVX512ER_M)))
CFLAGS += -mavx512er
endif
AVX512IFMA_M := $(shell grep "avx512ifma " /proc/cpuinfo)
ifneq (,$(findstring avx512ifma,$(AVX512IFMA_M)))
CFLAGS += -mavx512ifma
endif
AVX512PF_M := $(shell grep "avx512pf " /proc/cpuinfo)
ifneq (,$(findstring avx512pf,$(AVX512PF_M)))
CFLAGS += -mavx512pf
endif
else ifeq ($(UNAME_S),Haiku)
AVX1_M := $(shell sysinfo -cpu | grep -w "AVX")
ifneq (,$(findstring AVX,$(AVX1_M)))
CFLAGS += -mavx
endif
AVX2_M := $(shell sysinfo -cpu | grep -w "AVX2")
ifneq (,$(findstring AVX2,$(AVX2_M)))
CFLAGS += -mavx2
endif
FMA_M := $(shell sysinfo -cpu | grep -w "FMA")
ifneq (,$(findstring FMA,$(FMA_M)))
CFLAGS += -mfma
endif
F16C_M := $(shell sysinfo -cpu | grep -w "F16C")
ifneq (,$(findstring F16C,$(F16C_M)))
CFLAGS += -mf16c
endif
else
CFLAGS += -mfma -mf16c -mavx -mavx2
endif
# Use all CPU extensions that are available:
CFLAGS += -march=native -mtune=native
endif
ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)

View File

@@ -301,7 +301,7 @@ And after 4.45 hours, you will have the final perplexity.
### Android
You can easily run `llama.cpp` on Android device with [termux](https://play.google.com/store/apps/details?id=com.termux).
You can easily run `llama.cpp` on Android device with [termux](https://termux.dev/).
First, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake:
```
$ mkdir build-android
@@ -310,7 +310,7 @@ $ export NDK=<your_ndk_directory>
$ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod ..
$ make
```
Install [termux](https://play.google.com/store/apps/details?id=com.termux) on your device and run `termux-setup-storage` to get access to your SD card.
Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card.
Finally, copy the `llama` binary and the model files to your device storage. Here is a demo of an interactive session running on Pixel 5 phone:
https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4

View File

@@ -254,7 +254,7 @@ def main():
parser.add_argument(
"--hf",
action="store_true",
help="Whether to save the model in the huggingface format. (default: False)",
help="Whether to save the model in the Hugging Face format. (default: False)",
)
parser.add_argument(
"--chat", "-c", action="store_true", help="Whether to open a chat with the model. (default: False)"

View File

@@ -39,6 +39,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
bool invalid_param = false;
std::string arg;
gpt_params default_params;
for (int i = 1; i < argc; i++) {
arg = argv[i];
@@ -173,7 +175,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
}
params.n_parts = std::stoi(argv[i]);
} else if (arg == "-h" || arg == "--help") {
gpt_print_usage(argc, argv, params);
gpt_print_usage(argc, argv, default_params);
exit(0);
} else if (arg == "--random-prompt") {
params.random_prompt = true;
@@ -185,13 +187,13 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.input_prefix = argv[i];
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
gpt_print_usage(argc, argv, params);
gpt_print_usage(argc, argv, default_params);
exit(1);
}
}
if (invalid_param) {
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
gpt_print_usage(argc, argv, params);
gpt_print_usage(argc, argv, default_params);
exit(1);
}

View File

@@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file(
}
// reserve memory for context buffers
{
if (!params.vocab_only) {
if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
llama_free(ctx);