fix(worker): collapse incremental segments
Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
18
src/main.rs
18
src/main.rs
@@ -97,10 +97,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
.with(tracing_subscriber::fmt::layer().json())
|
||||
.init();
|
||||
|
||||
let data_dir = std::env::var("DATA_DIR").unwrap_or_else(|_| "/data".into());
|
||||
let model_path = std::env::var("WHISPER_MODEL_PATH")
|
||||
.unwrap_or_else(|_| "/models/ggml-large-v3.bin".into());
|
||||
let port = std::env::var("PORT").unwrap_or_else(|_| "8080".into());
|
||||
let data_dir = std::env::var("DATA_DIR").unwrap_or_else(|_| "/data".into());
|
||||
let model_path =
|
||||
std::env::var("WHISPER_MODEL_PATH").unwrap_or_else(|_| "/models/ggml-large-v3.bin".into());
|
||||
let port = std::env::var("PORT").unwrap_or_else(|_| "8080".into());
|
||||
let model_name = std::env::var("WHISPER_MODEL").unwrap_or_else(|_| "large-v3".into());
|
||||
let gpu_device: u32 = std::env::var("CUDA_DEVICE")
|
||||
.ok()
|
||||
@@ -132,7 +132,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
// Model starts unloaded — lazy load on first job or POST /model/load.
|
||||
let model_state = Arc::new(RwLock::new(models::ModelState::Unloaded));
|
||||
let (model_event_tx, _) = broadcast::channel::<models::ModelEvent>(32);
|
||||
let webhook_registry = Arc::new(std::sync::Mutex::new(std::collections::HashSet::<String>::new()));
|
||||
let webhook_registry = Arc::new(std::sync::Mutex::new(
|
||||
std::collections::HashSet::<String>::new(),
|
||||
));
|
||||
|
||||
// Spawn single GPU worker; get back the SSE broadcast registry and cmd channel.
|
||||
let (progress, cmd_tx) = worker::start(
|
||||
@@ -153,13 +155,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
cmd_tx,
|
||||
storage: Arc::clone(&storage),
|
||||
progress,
|
||||
model_name: model_name.as_str().into(),
|
||||
queue_depth: Arc::clone(&queue_depth),
|
||||
model_name: model_name.as_str().into(),
|
||||
queue_depth: Arc::clone(&queue_depth),
|
||||
gpu_device,
|
||||
model_state,
|
||||
model_event_tx,
|
||||
webhook_registry,
|
||||
idle_timeout: std::time::Duration::from_secs(idle_timeout_secs),
|
||||
idle_timeout: std::time::Duration::from_secs(idle_timeout_secs),
|
||||
gpu_poll_interval: std::time::Duration::from_secs(gpu_poll_interval_secs),
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user