Initial commit: tuned multi-model llama.cpp stack

- 5 models: SmolLM3-3B, Gemma4-E2B/E4B, Qwen3-4B, Qwen3.5-9B
- TurboQuant image (FORCE_MMQ): +6-11% free speed on Turing GPUs
- Bigctx profiles (-nkvo KV in RAM): 2-16x context gain
- turbo2 KV: 2x smaller, benchmarked against PPL quality gate
- Per-model env files with justified parameters
- kv_quant_test.sh + cpu_ctx_test.sh benchmark scripts
- docs/FINDINGS.md: surprises, pitfalls, recommendations
- docs/ARCHITECTURE.md: compose + test script design
This commit is contained in:
2026-05-06 15:56:40 +02:00
commit 4ad296608b
22 changed files with 2530 additions and 0 deletions

116
scripts/download_models.sh Executable file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env bash
# download_models.sh — Download GGUF model files to ./models/
#
# Usage:
# bash scripts/download_models.sh # all models
# bash scripts/download_models.sh smollm3 # single model
# bash scripts/download_models.sh gemma4-e2b gemma4-e4b # multiple
#
# Requires: huggingface-cli (pip install huggingface_hub)
# Models land in: ./models/
#
# Available keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all
set -euo pipefail
MODELS_DIR="$(cd "$(dirname "$0")/.." && pwd)/models"
mkdir -p "$MODELS_DIR"
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
check_hf_cli() {
if ! command -v huggingface-cli &>/dev/null; then
echo -e "${RED}Error: huggingface-cli not found.${NC}"
echo "Install with: pip install huggingface_hub"
exit 1
fi
}
download() {
local key="$1"
local repo="$2"
local filename="$3"
local size_hint="$4"
local dest="$MODELS_DIR/$filename"
if [[ -f "$dest" ]]; then
echo -e "${YELLOW}[$key]${NC} Already exists: $filename — skipping"
return
fi
echo -e "${GREEN}[$key]${NC} Downloading $filename (~$size_hint) from $repo ..."
huggingface-cli download "$repo" "$filename" --local-dir "$MODELS_DIR"
echo -e "${GREEN}[$key]${NC} Done: $MODELS_DIR/$filename"
}
download_smollm3() {
download "smollm3" \
"bartowski/HuggingFaceTB_SmolLM3-3B-GGUF" \
"HuggingFaceTB_SmolLM3-3B-Q4_K_M.gguf" \
"1.9 GB"
}
download_gemma4_e2b() {
download "gemma4-e2b" \
"bartowski/google_gemma-4-E2B-it-GGUF" \
"google_gemma-4-E2B-it-Q4_K_M.gguf" \
"2.9 GB"
}
download_gemma4_e4b() {
download "gemma4-e4b" \
"bartowski/google_gemma-4-E4B-it-GGUF" \
"google_gemma-4-E4B-it-Q4_K_M.gguf" \
"4.7 GB"
}
download_qwen3_4b() {
download "qwen3-4b" \
"bartowski/Qwen3-4B-GGUF" \
"Qwen3-4B-Q4_K_M.gguf" \
"2.4 GB"
}
download_qwen35_9b() {
download "qwen35-9b" \
"Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF" \
"Qwen3.5-9B.Q8_0.gguf" \
"8.9 GB"
}
main() {
check_hf_cli
local targets=("$@")
if [[ ${#targets[@]} -eq 0 || "${targets[0]}" == "all" ]]; then
targets=(smollm3 gemma4-e2b gemma4-e4b qwen3-4b qwen35-9b)
fi
for target in "${targets[@]}"; do
case "$target" in
smollm3) download_smollm3 ;;
gemma4-e2b) download_gemma4_e2b ;;
gemma4-e4b) download_gemma4_e4b ;;
qwen3-4b) download_qwen3_4b ;;
qwen35-9b) download_qwen35_9b ;;
all)
download_smollm3
download_gemma4_e2b
download_gemma4_e4b
download_qwen3_4b
download_qwen35_9b
;;
*)
echo -e "${RED}Unknown model: $target${NC}"
echo "Valid keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all"
exit 1
;;
esac
done
echo ""
echo "Models directory:"
ls -lh "$MODELS_DIR"/*.gguf 2>/dev/null || echo "(no .gguf files found)"
}
main "$@"