whisper-rtx2080/src/routes/health.rs

use std::sync::atomic::Ordering;

use axum::extract::State;
use axum::Json;

use crate::{models::HealthResponse, AppState, Result};

/// Return service health, GPU info, and queue depth.
#[utoipa::path(
    get,
    path = "/health",
    tag  = "system",
    responses(
        (status = 200, description = "Service healthy", body = HealthResponse),
    )
)]
pub async fn health(State(state): State<AppState>) -> Result<Json<HealthResponse>> {
    let (gpu_name, vram_total_mb) = gpu_info(state.gpu_device);
    let model_state_tag = state.model_state.read().await.tag().to_string();

    Ok(Json(HealthResponse {
        status: "ok".into(),
        gpu_name,
        vram_total_mb,
        model: state.model_name.to_string(),
        queue_depth: state.queue_depth.load(Ordering::Relaxed),
        model_state: model_state_tag,
    }))
}

/// Query NVIDIA GPU info via `nvidia-smi` for the given CUDA device index.
fn gpu_info(device: u32) -> (Option<String>, Option<u64>) {
    let Ok(out) = std::process::Command::new("nvidia-smi")
        .args([
            &format!("--id={device}"),
            "--query-gpu=name,memory.total",
            "--format=csv,noheader,nounits",
        ])
        .output()
    else {
        return (None, None);
    };

    if !out.status.success() {
        return (None, None);
    }

    let line = String::from_utf8_lossy(&out.stdout);
    let line = line.trim();
    let mut parts = line.splitn(2, ',');

    let name = parts.next().map(|s| s.trim().to_owned());
    let vram = parts.next().and_then(|s| s.trim().parse::<u64>().ok());

    (name, vram)
}