Initial commit: tuned multi-model llama.cpp stack
- 5 models: SmolLM3-3B, Gemma4-E2B/E4B, Qwen3-4B, Qwen3.5-9B - TurboQuant image (FORCE_MMQ): +6-11% free speed on Turing GPUs - Bigctx profiles (-nkvo KV in RAM): 2-16x context gain - turbo2 KV: 2x smaller, benchmarked against PPL quality gate - Per-model env files with justified parameters - kv_quant_test.sh + cpu_ctx_test.sh benchmark scripts - docs/FINDINGS.md: surprises, pitfalls, recommendations - docs/ARCHITECTURE.md: compose + test script design
This commit is contained in:
116
scripts/download_models.sh
Executable file
116
scripts/download_models.sh
Executable file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env bash
|
||||
# download_models.sh — Download GGUF model files to ./models/
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/download_models.sh # all models
|
||||
# bash scripts/download_models.sh smollm3 # single model
|
||||
# bash scripts/download_models.sh gemma4-e2b gemma4-e4b # multiple
|
||||
#
|
||||
# Requires: huggingface-cli (pip install huggingface_hub)
|
||||
# Models land in: ./models/
|
||||
#
|
||||
# Available keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODELS_DIR="$(cd "$(dirname "$0")/.." && pwd)/models"
|
||||
mkdir -p "$MODELS_DIR"
|
||||
|
||||
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
|
||||
|
||||
check_hf_cli() {
|
||||
if ! command -v huggingface-cli &>/dev/null; then
|
||||
echo -e "${RED}Error: huggingface-cli not found.${NC}"
|
||||
echo "Install with: pip install huggingface_hub"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
download() {
|
||||
local key="$1"
|
||||
local repo="$2"
|
||||
local filename="$3"
|
||||
local size_hint="$4"
|
||||
|
||||
local dest="$MODELS_DIR/$filename"
|
||||
if [[ -f "$dest" ]]; then
|
||||
echo -e "${YELLOW}[$key]${NC} Already exists: $filename — skipping"
|
||||
return
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}[$key]${NC} Downloading $filename (~$size_hint) from $repo ..."
|
||||
huggingface-cli download "$repo" "$filename" --local-dir "$MODELS_DIR"
|
||||
echo -e "${GREEN}[$key]${NC} Done: $MODELS_DIR/$filename"
|
||||
}
|
||||
|
||||
download_smollm3() {
|
||||
download "smollm3" \
|
||||
"bartowski/HuggingFaceTB_SmolLM3-3B-GGUF" \
|
||||
"HuggingFaceTB_SmolLM3-3B-Q4_K_M.gguf" \
|
||||
"1.9 GB"
|
||||
}
|
||||
|
||||
download_gemma4_e2b() {
|
||||
download "gemma4-e2b" \
|
||||
"bartowski/google_gemma-4-E2B-it-GGUF" \
|
||||
"google_gemma-4-E2B-it-Q4_K_M.gguf" \
|
||||
"2.9 GB"
|
||||
}
|
||||
|
||||
download_gemma4_e4b() {
|
||||
download "gemma4-e4b" \
|
||||
"bartowski/google_gemma-4-E4B-it-GGUF" \
|
||||
"google_gemma-4-E4B-it-Q4_K_M.gguf" \
|
||||
"4.7 GB"
|
||||
}
|
||||
|
||||
download_qwen3_4b() {
|
||||
download "qwen3-4b" \
|
||||
"bartowski/Qwen3-4B-GGUF" \
|
||||
"Qwen3-4B-Q4_K_M.gguf" \
|
||||
"2.4 GB"
|
||||
}
|
||||
|
||||
download_qwen35_9b() {
|
||||
download "qwen35-9b" \
|
||||
"Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF" \
|
||||
"Qwen3.5-9B.Q8_0.gguf" \
|
||||
"8.9 GB"
|
||||
}
|
||||
|
||||
main() {
|
||||
check_hf_cli
|
||||
|
||||
local targets=("$@")
|
||||
if [[ ${#targets[@]} -eq 0 || "${targets[0]}" == "all" ]]; then
|
||||
targets=(smollm3 gemma4-e2b gemma4-e4b qwen3-4b qwen35-9b)
|
||||
fi
|
||||
|
||||
for target in "${targets[@]}"; do
|
||||
case "$target" in
|
||||
smollm3) download_smollm3 ;;
|
||||
gemma4-e2b) download_gemma4_e2b ;;
|
||||
gemma4-e4b) download_gemma4_e4b ;;
|
||||
qwen3-4b) download_qwen3_4b ;;
|
||||
qwen35-9b) download_qwen35_9b ;;
|
||||
all)
|
||||
download_smollm3
|
||||
download_gemma4_e2b
|
||||
download_gemma4_e4b
|
||||
download_qwen3_4b
|
||||
download_qwen35_9b
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Unknown model: $target${NC}"
|
||||
echo "Valid keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Models directory:"
|
||||
ls -lh "$MODELS_DIR"/*.gguf 2>/dev/null || echo "(no .gguf files found)"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user