Initial commit: tuned multi-model llama.cpp stack

- 5 models: SmolLM3-3B, Gemma4-E2B/E4B, Qwen3-4B, Qwen3.5-9B - TurboQuant image (FORCE_MMQ): +6-11% free speed on Turing GPUs - Bigctx profiles (-nkvo KV in RAM): 2-16x context gain - turbo2 KV: 2x smaller, benchmarked against PPL quality gate - Per-model env files with justified parameters - kv_quant_test.sh + cpu_ctx_test.sh benchmark scripts - docs/FINDINGS.md: surprises, pitfalls, recommendations - docs/ARCHITECTURE.md: compose + test script design
2026-05-06 15:56:40 +02:00
commit 4ad296608b
22 changed files with 2530 additions and 0 deletions
--- a/scripts/download_models.sh
+++ b/scripts/download_models.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+# download_models.sh — Download GGUF model files to ./models/
+#
+# Usage:
+#   bash scripts/download_models.sh           # all models
+#   bash scripts/download_models.sh smollm3   # single model
+#   bash scripts/download_models.sh gemma4-e2b gemma4-e4b  # multiple
+#
+# Requires: huggingface-cli (pip install huggingface_hub)
+# Models land in: ./models/
+#
+# Available keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all
+
+set -euo pipefail
+
+MODELS_DIR="$(cd "$(dirname "$0")/.." && pwd)/models"
+mkdir -p "$MODELS_DIR"
+
+GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m'
+
+check_hf_cli() {
+    if ! command -v huggingface-cli &>/dev/null; then
+        echo -e "${RED}Error: huggingface-cli not found.${NC}"
+        echo "Install with: pip install huggingface_hub"
+        exit 1
+    fi
+}
+
+download() {
+    local key="$1"
+    local repo="$2"
+    local filename="$3"
+    local size_hint="$4"
+
+    local dest="$MODELS_DIR/$filename"
+    if [[ -f "$dest" ]]; then
+        echo -e "${YELLOW}[$key]${NC} Already exists: $filename — skipping"
+        return
+    fi
+
+    echo -e "${GREEN}[$key]${NC} Downloading $filename (~$size_hint) from $repo ..."
+    huggingface-cli download "$repo" "$filename" --local-dir "$MODELS_DIR"
+    echo -e "${GREEN}[$key]${NC} Done: $MODELS_DIR/$filename"
+}
+
+download_smollm3() {
+    download "smollm3" \
+        "bartowski/HuggingFaceTB_SmolLM3-3B-GGUF" \
+        "HuggingFaceTB_SmolLM3-3B-Q4_K_M.gguf" \
+        "1.9 GB"
+}
+
+download_gemma4_e2b() {
+    download "gemma4-e2b" \
+        "bartowski/google_gemma-4-E2B-it-GGUF" \
+        "google_gemma-4-E2B-it-Q4_K_M.gguf" \
+        "2.9 GB"
+}
+
+download_gemma4_e4b() {
+    download "gemma4-e4b" \
+        "bartowski/google_gemma-4-E4B-it-GGUF" \
+        "google_gemma-4-E4B-it-Q4_K_M.gguf" \
+        "4.7 GB"
+}
+
+download_qwen3_4b() {
+    download "qwen3-4b" \
+        "bartowski/Qwen3-4B-GGUF" \
+        "Qwen3-4B-Q4_K_M.gguf" \
+        "2.4 GB"
+}
+
+download_qwen35_9b() {
+    download "qwen35-9b" \
+        "Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF" \
+        "Qwen3.5-9B.Q8_0.gguf" \
+        "8.9 GB"
+}
+
+main() {
+    check_hf_cli
+
+    local targets=("$@")
+    if [[ ${#targets[@]} -eq 0 || "${targets[0]}" == "all" ]]; then
+        targets=(smollm3 gemma4-e2b gemma4-e4b qwen3-4b qwen35-9b)
+    fi
+
+    for target in "${targets[@]}"; do
+        case "$target" in
+            smollm3)     download_smollm3 ;;
+            gemma4-e2b)  download_gemma4_e2b ;;
+            gemma4-e4b)  download_gemma4_e4b ;;
+            qwen3-4b)    download_qwen3_4b ;;
+            qwen35-9b)   download_qwen35_9b ;;
+            all)
+                download_smollm3
+                download_gemma4_e2b
+                download_gemma4_e4b
+                download_qwen3_4b
+                download_qwen35_9b
+                ;;
+            *)
+                echo -e "${RED}Unknown model: $target${NC}"
+                echo "Valid keys: smollm3 | gemma4-e2b | gemma4-e4b | qwen3-4b | qwen35-9b | all"
+                exit 1
+                ;;
+        esac
+    done
+
+    echo ""
+    echo "Models directory:"
+    ls -lh "$MODELS_DIR"/*.gguf 2>/dev/null || echo "(no .gguf files found)"
+}
+
+main "$@"