- 5 models: SmolLM3-3B, Gemma4-E2B/E4B, Qwen3-4B, Qwen3.5-9B - TurboQuant image (FORCE_MMQ): +6-11% free speed on Turing GPUs - Bigctx profiles (-nkvo KV in RAM): 2-16x context gain - turbo2 KV: 2x smaller, benchmarked against PPL quality gate - Per-model env files with justified parameters - kv_quant_test.sh + cpu_ctx_test.sh benchmark scripts - docs/FINDINGS.md: surprises, pitfalls, recommendations - docs/ARCHITECTURE.md: compose + test script design
117 lines
3.2 KiB
Bash
Executable File
117 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# download_models.sh — Download GGUF model files to ./models/
|
|
#
|
|
# Usage:
|
|
# bash scripts/download_models.sh # all models
|
|
# bash scripts/download_models.sh smollm3 # single model
|
|
# bash scripts/download_models.sh gemma4-e2b gemma4-e4b # multiple
|
|
#
|
|
# Requires: huggingface-cli (pip install huggingface_hub)
|
|
# Models land in: ./models/
|
|
#
|
|
# Available keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all
|
|
|
|
set -euo pipefail
|
|
|
|
MODELS_DIR="$(cd "$(dirname "$0")/.." && pwd)/models"
|
|
mkdir -p "$MODELS_DIR"
|
|
|
|
GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
|
|
|
|
check_hf_cli() {
|
|
if ! command -v huggingface-cli &>/dev/null; then
|
|
echo -e "${RED}Error: huggingface-cli not found.${NC}"
|
|
echo "Install with: pip install huggingface_hub"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
download() {
|
|
local key="$1"
|
|
local repo="$2"
|
|
local filename="$3"
|
|
local size_hint="$4"
|
|
|
|
local dest="$MODELS_DIR/$filename"
|
|
if [[ -f "$dest" ]]; then
|
|
echo -e "${YELLOW}[$key]${NC} Already exists: $filename — skipping"
|
|
return
|
|
fi
|
|
|
|
echo -e "${GREEN}[$key]${NC} Downloading $filename (~$size_hint) from $repo ..."
|
|
huggingface-cli download "$repo" "$filename" --local-dir "$MODELS_DIR"
|
|
echo -e "${GREEN}[$key]${NC} Done: $MODELS_DIR/$filename"
|
|
}
|
|
|
|
download_smollm3() {
|
|
download "smollm3" \
|
|
"bartowski/HuggingFaceTB_SmolLM3-3B-GGUF" \
|
|
"HuggingFaceTB_SmolLM3-3B-Q4_K_M.gguf" \
|
|
"1.9 GB"
|
|
}
|
|
|
|
download_gemma4_e2b() {
|
|
download "gemma4-e2b" \
|
|
"bartowski/google_gemma-4-E2B-it-GGUF" \
|
|
"google_gemma-4-E2B-it-Q4_K_M.gguf" \
|
|
"2.9 GB"
|
|
}
|
|
|
|
download_gemma4_e4b() {
|
|
download "gemma4-e4b" \
|
|
"bartowski/google_gemma-4-E4B-it-GGUF" \
|
|
"google_gemma-4-E4B-it-Q4_K_M.gguf" \
|
|
"4.7 GB"
|
|
}
|
|
|
|
download_qwen3_4b() {
|
|
download "qwen3-4b" \
|
|
"bartowski/Qwen3-4B-GGUF" \
|
|
"Qwen3-4B-Q4_K_M.gguf" \
|
|
"2.4 GB"
|
|
}
|
|
|
|
download_qwen35_9b() {
|
|
download "qwen35-9b" \
|
|
"Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF" \
|
|
"Qwen3.5-9B.Q8_0.gguf" \
|
|
"8.9 GB"
|
|
}
|
|
|
|
main() {
|
|
check_hf_cli
|
|
|
|
local targets=("$@")
|
|
if [[ ${#targets[@]} -eq 0 || "${targets[0]}" == "all" ]]; then
|
|
targets=(smollm3 gemma4-e2b gemma4-e4b qwen3-4b qwen35-9b)
|
|
fi
|
|
|
|
for target in "${targets[@]}"; do
|
|
case "$target" in
|
|
smollm3) download_smollm3 ;;
|
|
gemma4-e2b) download_gemma4_e2b ;;
|
|
gemma4-e4b) download_gemma4_e4b ;;
|
|
qwen3-4b) download_qwen3_4b ;;
|
|
qwen35-9b) download_qwen35_9b ;;
|
|
all)
|
|
download_smollm3
|
|
download_gemma4_e2b
|
|
download_gemma4_e4b
|
|
download_qwen3_4b
|
|
download_qwen35_9b
|
|
;;
|
|
*)
|
|
echo -e "${RED}Unknown model: $target${NC}"
|
|
echo "Valid keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
echo ""
|
|
echo "Models directory:"
|
|
ls -lh "$MODELS_DIR"/*.gguf 2>/dev/null || echo "(no .gguf files found)"
|
|
}
|
|
|
|
main "$@"
|