mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2026-02-05 13:53:23 +02:00
* wip * wip * fix logging, add display info * handle commands * add args * wip * move old cli to llama-completion * rm deprecation notice * move server to a shared library * move ci to llama-completion * add loading animation * add --show-timings arg * add /read command, improve LOG_ERR * add args for speculative decoding, enable show timings by default * add arg --image and --audio * fix windows build * support reasoning_content * fix llama2c workflow * color default is auto * fix merge conflicts * properly fix color problem Co-authored-by: bandoti <bandoti@users.noreply.github.com> * better loading spinner * make sure to clean color on force-exit * also clear input files on "/clear" * simplify common_log_flush * add warning in mtmd-cli * implement console writter * fix data race * add attribute * fix llama-completion and mtmd-cli * add some notes about console::log * fix compilation --------- Co-authored-by: bandoti <bandoti@users.noreply.github.com>
145 lines
5.5 KiB
Bash
Executable File
145 lines
5.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -e
|
|
|
|
# Array of models to iterate over
|
|
declare -a params=(
|
|
"Gemma2ForCausalLM 64"
|
|
"LlamaForCausalLM 64"
|
|
"Phi3ForCausalLM 64"
|
|
)
|
|
|
|
MODELS_REPO=lora-tests
|
|
MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO
|
|
COMMIT=c26d5fb85b4070a9e9c4e65d132c783b98086890
|
|
|
|
# Clone the Hugging Face repository if the directory does not exist
|
|
if [ ! -d "$MODELS_REPO" ]; then
|
|
echo "Cloning the Hugging Face repository..."
|
|
git clone $MODELS_REPO_URL --depth 1
|
|
cd $MODELS_REPO
|
|
git fetch --depth=1 origin $COMMIT
|
|
git reset --hard $COMMIT
|
|
cd -
|
|
else
|
|
echo "Repository already exists. Skipping clone."
|
|
fi
|
|
|
|
# Array to store results to print
|
|
results=()
|
|
|
|
trim_leading_whitespace() {
|
|
local input_string="$1"
|
|
echo "${input_string#"${input_string%%[![:space:]]*}"}"
|
|
}
|
|
|
|
extract_starting_substring() {
|
|
local reference_string="$1"
|
|
local target_string="$2"
|
|
|
|
local target_length=${#target_string}
|
|
echo "${reference_string:0:$target_length}"
|
|
}
|
|
|
|
get_first_word() {
|
|
local input_string="$1"
|
|
read -r first_word _ <<< "$input_string"
|
|
echo "$first_word"
|
|
}
|
|
|
|
# Load the expected strings
|
|
EXPECTED_BASE_FULL=$(cat $MODELS_REPO/data/pale_blue_dot.txt)
|
|
EXPECTED_LORA_FULL=$(cat $MODELS_REPO/data/bohemian_rhapsody.txt)
|
|
EXPECTED_BASE_FIRST_WORD=$(get_first_word "$EXPECTED_BASE_FULL")
|
|
EXPECTED_LORA_FIRST_WORD=$(get_first_word "$EXPECTED_LORA_FULL")
|
|
|
|
run_conversion_and_inference_lora() {
|
|
local model_name=$1
|
|
local hidden_size=$2
|
|
|
|
echo -e "\n\n-------- RUNNING TEST FOR MODEL $model_name --------\n\n"
|
|
|
|
# Convert safetensors to gguf
|
|
echo "Running convert_hf_to_gguf.py for $model_name with hidden_size $hidden_size..."
|
|
python convert_hf_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \
|
|
--outfile $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
|
|
--outtype f32
|
|
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo "Running convert_lora_to_gguf.py for $model_name with hidden_size $hidden_size..."
|
|
python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora \
|
|
--base $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \
|
|
--outtype f32
|
|
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo "Running llama-export-lora with lora for $model_name with hidden_size $hidden_size..."
|
|
./llama-export-lora \
|
|
-m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
|
|
-o $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \
|
|
--lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf
|
|
|
|
# Run inference
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo "Running llama-completion without lora for $model_name with hidden_size $hidden_size..."
|
|
OUTPUT_BASE=$(./llama-completion -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
|
|
-p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0)
|
|
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo "Running llama-completion with hot lora for $model_name with hidden_size $hidden_size..."
|
|
OUTPUT_LORA_HOT=$(./llama-completion -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \
|
|
--lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \
|
|
-p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0)
|
|
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo "Running llama-completion with merged lora for $model_name with hidden_size $hidden_size..."
|
|
OUTPUT_LORA_MERGED=$(./llama-completion -no-cnv -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \
|
|
-p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0)
|
|
|
|
# Remove any initial white space
|
|
OUTPUT_BASE=$(trim_leading_whitespace "$OUTPUT_BASE")
|
|
OUTPUT_LORA_HOT=$(trim_leading_whitespace "$OUTPUT_LORA_HOT")
|
|
OUTPUT_LORA_MERGED=$(trim_leading_whitespace "$OUTPUT_LORA_MERGED")
|
|
# Extract the corresponding substring from full string
|
|
EXPECTED_BASE=$(extract_starting_substring "$EXPECTED_BASE_FULL" "$OUTPUT_BASE")
|
|
EXPECTED_LORA=$(extract_starting_substring "$EXPECTED_LORA_FULL" "$OUTPUT_LORA_HOT")
|
|
|
|
# Assert output equals the expected output
|
|
if [[ "$OUTPUT_BASE" != "$EXPECTED_BASE" ]]; then
|
|
echo "Error: $model_name OUTPUT_BASE does not start with the expected string."
|
|
echo -e "Out=$OUTPUT_BASE\n\nExp=$EXPECTED_BASE"
|
|
exit 1
|
|
fi
|
|
if [[ "$OUTPUT_LORA_HOT" != "$EXPECTED_LORA" ]]; then
|
|
echo "Error: $model_name OUTPUT_LORA_HOT does not start with the expected string."
|
|
echo -e "Out=$OUTPUT_LORA_HOT\n\nExp=$EXPECTED_LORA"
|
|
exit 1
|
|
fi
|
|
if [[ "$OUTPUT_LORA_MERGED" != "$EXPECTED_LORA" ]]; then
|
|
echo "Error: $model_name OUTPUT_LORA_MERGED does not start with the expected string."
|
|
echo -e "Out=$OUTPUT_LORA_MERGED\n\nExp=$EXPECTED_LORA"
|
|
exit 1
|
|
fi
|
|
|
|
# Store the results
|
|
results+=("
|
|
\n\033[1mResults for $model_name with hidden_size $hidden_size:\033[0m
|
|
\n\033[32m • Base:\n$OUTPUT_BASE
|
|
\n\033[34m • Lora hot:\n$OUTPUT_LORA_HOT
|
|
\n\033[36m • Lora merged:\n$OUTPUT_LORA_MERGED
|
|
\n \033[0m
|
|
")
|
|
|
|
echo "All tests passed for $model_name with hidden_size $hidden_size!"
|
|
}
|
|
|
|
# Run test for each model
|
|
for param in "${params[@]}"; do
|
|
run_conversion_and_inference_lora $param
|
|
done
|
|
|
|
# Print results
|
|
echo -e "\n\n---------------------------\n\n"
|
|
echo -e "\n\033[1mSummary of All Results:\033[0m"
|
|
for result in "${results[@]}"; do
|
|
echo -e "$result"
|
|
done
|