fix(worker): collapse incremental segments
All checks were successful
Build & Push Docker Image / test (push) Successful in 6m20s
Build & Push Docker Image / build-and-push (push) Successful in 6m29s

Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-11 22:46:38 +02:00
parent d3a67f11b3
commit cb0b07b2ff
10 changed files with 712 additions and 331 deletions

View File

@@ -16,8 +16,7 @@ use crate::{
models::{Job, JobId, JobStatus, ModelEvent, ModelState, Segment},
storage::Storage,
transcriber::Transcriber,
webhook,
AppError,
webhook, AppError,
};
/// Per-job broadcast channel for SSE subscribers.
@@ -26,7 +25,11 @@ pub type ProgressTx = broadcast::Sender<ProgressEvent>;
#[derive(Debug, Clone)]
pub enum ProgressEvent {
/// `percent` — overall 0100; `chunk` — 1-based; `total` — total chunks.
Progress { percent: u8, chunk: usize, total: usize },
Progress {
percent: u8,
chunk: usize,
total: usize,
},
Done(Box<Job>),
Error(String),
}
@@ -50,11 +53,11 @@ pub enum WorkerCmd {
// ── Transcription request/response types ─────────────────────────────────────
pub struct TranscribeRequest {
pub pcm: Vec<f32>,
pub language: Option<String>,
pub task: String,
pub pcm: Vec<f32>,
pub language: Option<String>,
pub task: String,
pub on_progress: Box<dyn Fn(u8) + Send + 'static>,
pub reply: oneshot::Sender<crate::Result<(Vec<Segment>, String)>>,
pub reply: oneshot::Sender<crate::Result<(Vec<Segment>, String)>>,
}
impl std::fmt::Debug for TranscribeRequest {
@@ -75,15 +78,15 @@ impl std::fmt::Debug for TranscribeRequest {
/// trigger loading.
#[allow(clippy::too_many_arguments)]
pub fn start(
job_rx: mpsc::UnboundedReceiver<JobId>,
storage: Arc<Storage>,
model_path: PathBuf,
queue_depth: Arc<AtomicUsize>,
gpu_device: u32,
model_state: Arc<RwLock<ModelState>>,
model_event_tx: broadcast::Sender<ModelEvent>,
job_rx: mpsc::UnboundedReceiver<JobId>,
storage: Arc<Storage>,
model_path: PathBuf,
queue_depth: Arc<AtomicUsize>,
gpu_device: u32,
model_state: Arc<RwLock<ModelState>>,
model_event_tx: broadcast::Sender<ModelEvent>,
webhook_registry: Arc<Mutex<HashSet<String>>>,
idle_timeout: Duration,
idle_timeout: Duration,
gpu_poll_interval: Duration,
) -> (ProgressRegistry, std::sync::mpsc::SyncSender<WorkerCmd>) {
let registry: ProgressRegistry = Arc::new(dashmap::DashMap::new());
@@ -126,15 +129,15 @@ pub fn start(
/// separate thread.
#[allow(clippy::too_many_arguments)]
fn transcriber_thread(
rx: std::sync::mpsc::Receiver<WorkerCmd>,
model_path: PathBuf,
gpu_device: u32,
model_state: Arc<RwLock<ModelState>>,
model_event_tx: broadcast::Sender<ModelEvent>,
rx: std::sync::mpsc::Receiver<WorkerCmd>,
model_path: PathBuf,
gpu_device: u32,
model_state: Arc<RwLock<ModelState>>,
model_event_tx: broadcast::Sender<ModelEvent>,
webhook_registry: Arc<Mutex<HashSet<String>>>,
idle_timeout: Duration,
idle_timeout: Duration,
gpu_poll_interval: Duration,
rt: tokio::runtime::Handle,
rt: tokio::runtime::Handle,
) {
let mut transcriber: Option<Transcriber> = None;
let mut last_job = Instant::now();
@@ -162,14 +165,22 @@ fn transcriber_thread(
}
Ok(WorkerCmd::Unload) => {
do_unload(&mut transcriber, &model_state, &model_event_tx, &webhook_registry, &rt);
do_unload(
&mut transcriber,
&model_state,
&model_event_tx,
&webhook_registry,
&rt,
);
}
Ok(WorkerCmd::Transcribe(req)) => {
let t = match &mut transcriber {
Some(t) => t,
None => {
tracing::warn!("Transcribe cmd received but model is unloaded — failing job");
tracing::warn!(
"Transcribe cmd received but model is unloaded — failing job"
);
let _ = req.reply.send(Err(AppError::Internal(
"model unloaded before job could run".into(),
)));
@@ -177,12 +188,9 @@ fn transcriber_thread(
}
};
let result = t.transcribe(
&req.pcm,
req.language.as_deref(),
&req.task,
move |p| (req.on_progress)(p),
);
let result = t.transcribe(&req.pcm, req.language.as_deref(), &req.task, move |p| {
(req.on_progress)(p)
});
last_job = Instant::now();
let _ = req.reply.send(result);
}
@@ -218,14 +226,14 @@ fn transcriber_thread(
/// rejection. Returns `Some(Transcriber)` on success, `None` if cancelled.
#[allow(clippy::too_many_arguments)]
fn try_load_with_polling(
rx: &std::sync::mpsc::Receiver<WorkerCmd>,
model_path: &PathBuf,
gpu_device: u32,
model_state: &Arc<RwLock<ModelState>>,
model_event_tx: &broadcast::Sender<ModelEvent>,
rx: &std::sync::mpsc::Receiver<WorkerCmd>,
model_path: &PathBuf,
gpu_device: u32,
model_state: &Arc<RwLock<ModelState>>,
model_event_tx: &broadcast::Sender<ModelEvent>,
webhook_registry: &Arc<Mutex<HashSet<String>>>,
gpu_poll_interval: Duration,
rt: &tokio::runtime::Handle,
rt: &tokio::runtime::Handle,
) -> Option<Transcriber> {
loop {
set_state(model_state, ModelState::Loading);
@@ -253,25 +261,35 @@ fn try_load_with_polling(
"insufficient VRAM — will retry"
);
set_state(model_state, ModelState::WaitingForGpu {
vram_needed_mb,
vram_free_mb,
retry_in_secs,
});
broadcast_event(model_event_tx, ModelEvent::ModelWaitingForGpu {
vram_needed_mb,
vram_free_mb,
retry_in_secs,
});
set_state(
model_state,
ModelState::WaitingForGpu {
vram_needed_mb,
vram_free_mb,
retry_in_secs,
},
);
broadcast_event(
model_event_tx,
ModelEvent::ModelWaitingForGpu {
vram_needed_mb,
vram_free_mb,
retry_in_secs,
},
);
// Interruptible sleep: drain rx while waiting for gpu_poll_interval.
let deadline = Instant::now() + gpu_poll_interval;
loop {
let remaining = deadline.saturating_duration_since(Instant::now());
if remaining.is_zero() { break; }
if remaining.is_zero() {
break;
}
match rx.recv_timeout(remaining.min(Duration::from_secs(1))) {
Ok(WorkerCmd::Unload) => {
tracing::info!("Unload received while waiting for GPU — cancelling load");
tracing::info!(
"Unload received while waiting for GPU — cancelling load"
);
set_state(model_state, ModelState::Unloaded);
broadcast_event(model_event_tx, ModelEvent::ModelUnloaded);
fire_webhooks(webhook_registry, ModelEvent::ModelUnloaded, rt);
@@ -303,11 +321,11 @@ fn try_load_with_polling(
}
fn do_unload(
transcriber: &mut Option<Transcriber>,
model_state: &Arc<RwLock<ModelState>>,
model_event_tx: &broadcast::Sender<ModelEvent>,
transcriber: &mut Option<Transcriber>,
model_state: &Arc<RwLock<ModelState>>,
model_event_tx: &broadcast::Sender<ModelEvent>,
webhook_registry: &Arc<Mutex<HashSet<String>>>,
rt: &tokio::runtime::Handle,
rt: &tokio::runtime::Handle,
) {
*transcriber = None;
set_state(model_state, ModelState::Unloaded);
@@ -328,8 +346,8 @@ fn broadcast_event(tx: &broadcast::Sender<ModelEvent>, event: ModelEvent) {
fn fire_webhooks(
registry: &Arc<Mutex<HashSet<String>>>,
event: ModelEvent,
rt: &tokio::runtime::Handle,
event: ModelEvent,
rt: &tokio::runtime::Handle,
) {
if !event.is_webhook_event() {
return;
@@ -341,11 +359,16 @@ fn fire_webhooks(
.cloned()
.collect();
if urls.is_empty() { return; }
if urls.is_empty() {
return;
}
let payload = match serde_json::to_string(&event) {
Ok(p) => p,
Err(e) => { tracing::error!(error = %e, "failed to serialize model event"); return; }
Ok(p) => p,
Err(e) => {
tracing::error!(error = %e, "failed to serialize model event");
return;
}
};
for url in urls {
@@ -356,7 +379,8 @@ fn fire_webhooks(
.build()
.expect("http client");
for attempt in 0..3_u32 {
match http.post(&url)
match http
.post(&url)
.header("content-type", "application/json")
.body(body.clone())
.send()
@@ -405,11 +429,11 @@ fn parse_oom_vram(msg: &str, gpu_device: u32) -> (u64, u64) {
// ── Async job runner ──────────────────────────────────────────────────────────
async fn run(
mut job_rx: mpsc::UnboundedReceiver<JobId>,
storage: Arc<Storage>,
mut job_rx: mpsc::UnboundedReceiver<JobId>,
storage: Arc<Storage>,
queue_depth: Arc<AtomicUsize>,
registry: ProgressRegistry,
cmd_tx: std::sync::mpsc::SyncSender<WorkerCmd>,
registry: ProgressRegistry,
cmd_tx: std::sync::mpsc::SyncSender<WorkerCmd>,
) {
let http = Client::builder()
.timeout(Duration::from_secs(30))
@@ -420,7 +444,7 @@ async fn run(
queue_depth.fetch_sub(1, Ordering::Relaxed);
let mut job = match storage.get(&job_id).await {
Ok(j) => j,
Ok(j) => j,
Err(e) => {
tracing::warn!(job_id = %job_id, error = %e, "job vanished before processing");
registry.remove(&job_id);
@@ -461,19 +485,19 @@ async fn run(
match result {
Ok((segments, language, duration_secs)) => {
job.status = JobStatus::Done;
job.segments = segments;
job.language = Some(language);
job.status = JobStatus::Done;
job.segments = segments;
job.language = Some(language);
job.duration_secs = Some(duration_secs);
job.progress = 100;
job.completed_at = Some(Utc::now());
job.progress = 100;
job.completed_at = Some(Utc::now());
let _ = progress_tx.send(ProgressEvent::Done(Box::new(job.clone())));
}
Err(e) => {
let msg = e.to_string();
tracing::error!(job_id = %job_id, error = %msg, "transcription failed");
job.status = JobStatus::Failed;
job.error = Some(msg.clone());
job.status = JobStatus::Failed;
job.error = Some(msg.clone());
job.completed_at = Some(Utc::now());
let _ = progress_tx.send(ProgressEvent::Error(msg));
}
@@ -485,9 +509,11 @@ async fn run(
if let Some(url) = &job.webhook_url.clone() {
let http = http.clone();
let url = url.clone();
let job = job.clone();
tokio::spawn(async move { webhook::fire(&http, &url, &job).await; });
let url = url.clone();
let job = job.clone();
tokio::spawn(async move {
webhook::fire(&http, &url, &job).await;
});
}
tokio::time::sleep(Duration::from_secs(30)).await;
@@ -498,9 +524,9 @@ async fn run(
// ── Silence-based chunking ────────────────────────────────────────────────────
const TARGET_CHUNK_SECS: f32 = 60.0;
const SNAP_WINDOW_SECS: f32 = 30.0;
const SILENCE_DB: &str = "-35dB";
const SILENCE_DUR: &str = "0.4";
const SNAP_WINDOW_SECS: f32 = 30.0;
const SILENCE_DB: &str = "-35dB";
const SILENCE_DUR: &str = "0.4";
async fn detect_silence_midpoints(path: &std::path::Path) -> Vec<f32> {
use tokio::process::Command;
@@ -509,15 +535,19 @@ async fn detect_silence_midpoints(path: &std::path::Path) -> Vec<f32> {
let output = Command::new("ffmpeg")
.args([
"-nostdin",
"-i", path.to_str().unwrap_or(""),
"-af", &filter,
"-f", "null", "-",
"-i",
path.to_str().unwrap_or(""),
"-af",
&filter,
"-f",
"null",
"-",
])
.output()
.await;
let output = match output {
Ok(o) => o,
Ok(o) => o,
Err(e) => {
tracing::warn!(error = %e, "silencedetect unavailable; using hard cuts");
return Vec::new();
@@ -526,7 +556,7 @@ async fn detect_silence_midpoints(path: &std::path::Path) -> Vec<f32> {
let stderr = String::from_utf8_lossy(&output.stderr);
let mut starts: Vec<f32> = Vec::new();
let mut ends: Vec<f32> = Vec::new();
let mut ends: Vec<f32> = Vec::new();
for line in stderr.lines() {
if let Some(i) = line.find("silence_start: ") {
@@ -545,7 +575,9 @@ async fn detect_silence_midpoints(path: &std::path::Path) -> Vec<f32> {
}
}
let mids: Vec<f32> = starts.iter().zip(ends.iter())
let mids: Vec<f32> = starts
.iter()
.zip(ends.iter())
.map(|(s, e)| (s + e) / 2.0)
.collect();
@@ -553,18 +585,15 @@ async fn detect_silence_midpoints(path: &std::path::Path) -> Vec<f32> {
mids
}
fn snap_to_silence(
mids: &[f32],
total_secs: f32,
target_secs: f32,
snap_window: f32,
) -> Vec<f32> {
fn snap_to_silence(mids: &[f32], total_secs: f32, target_secs: f32, snap_window: f32) -> Vec<f32> {
let mut cuts: Vec<f32> = Vec::new();
let mut pos = target_secs;
while pos < total_secs - target_secs * 0.25 {
let prev_cut = cuts.last().copied().unwrap_or(0.0);
let best = mids.iter().copied()
let best = mids
.iter()
.copied()
.filter(|&t| t > prev_cut + 10.0 && (t - pos).abs() <= snap_window)
.min_by(|a, b| (a - pos).abs().partial_cmp(&(b - pos).abs()).unwrap());
let cut = best.unwrap_or(pos);
@@ -591,20 +620,165 @@ fn to_chunk_ranges(cuts: &[f32], total_secs: f32) -> Vec<(f32, f32)> {
ranges
}
const MAX_CHAIN_GAP_SECS: f32 = 0.15;
const MIN_MEANINGFUL_WORDS: usize = 2;
const MIN_MEANINGFUL_CHARS: usize = 8;
const MIN_OVERLAP_WORDS: usize = 3;
fn normalised_words(text: &str) -> Vec<String> {
text.split_whitespace()
.map(|word| {
word.chars()
.filter(|ch| ch.is_alphanumeric() || *ch == '_')
.flat_map(|ch| ch.to_lowercase())
.collect::<String>()
})
.filter(|word| !word.is_empty())
.collect()
}
fn starts_with_words(full: &[String], prefix: &[String]) -> bool {
prefix.len() <= full.len() && full.iter().take(prefix.len()).eq(prefix.iter())
}
fn ends_with_words(full: &[String], suffix: &[String]) -> bool {
suffix.len() <= full.len()
&& full
.iter()
.skip(full.len() - suffix.len())
.eq(suffix.iter())
}
fn suffix_prefix_overlap(left: &[String], right: &[String]) -> usize {
let max = left.len().min(right.len());
for size in (1..=max).rev() {
if left[left.len() - size..] == right[..size] {
return size;
}
}
0
}
fn is_meaningful_phrase(words: &[String]) -> bool {
words.len() >= MIN_MEANINGFUL_WORDS
&& words.iter().map(|word| word.len()).sum::<usize>() >= MIN_MEANINGFUL_CHARS
}
fn trim_leading_words(text: &str, count: usize) -> String {
text.split_whitespace()
.skip(count)
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn merge_identical_segments(segments: Vec<Segment>) -> Vec<Segment> {
let mut out: Vec<Segment> = Vec::with_capacity(segments.len());
for seg in segments {
if let Some(last) = out.last_mut() {
if normalised_words(&last.text) == normalised_words(&seg.text) {
last.end = last.end.max(seg.end);
if !seg.words.is_empty() {
last.words = seg.words;
}
continue;
}
}
out.push(seg);
}
out
}
fn collapse_incremental_segments(segments: Vec<Segment>) -> Vec<Segment> {
let mut out: Vec<Segment> = Vec::with_capacity(segments.len());
for mut seg in segments {
seg.text = seg.text.trim().to_string();
if seg.text.is_empty() {
continue;
}
let Some(last) = out.last_mut() else {
out.push(seg);
continue;
};
let gap = seg.start - last.end;
if gap > MAX_CHAIN_GAP_SECS {
out.push(seg);
continue;
}
let last_words = normalised_words(&last.text);
let seg_words = normalised_words(&seg.text);
if last_words.is_empty() || seg_words.is_empty() {
out.push(seg);
continue;
}
if seg_words.len() > last_words.len()
&& starts_with_words(&seg_words, &last_words)
&& is_meaningful_phrase(&last_words)
{
last.text = seg.text;
last.end = seg.end;
last.words = seg.words;
continue;
}
if ends_with_words(&last_words, &seg_words) && is_meaningful_phrase(&seg_words) {
last.end = last.end.max(seg.end);
continue;
}
let overlap = suffix_prefix_overlap(&last_words, &seg_words);
if overlap >= MIN_OVERLAP_WORDS {
let trimmed_text = trim_leading_words(&seg.text, overlap);
if trimmed_text.is_empty() {
last.end = last.end.max(seg.end);
continue;
}
seg.start = seg.start.max(last.end);
seg.text = trimmed_text;
seg.words.clear();
}
out.push(seg);
}
out
}
fn normalise_segments(segments: Vec<Segment>) -> Vec<Segment> {
let mut result = collapse_incremental_segments(segments);
result = merge_identical_segments(result);
result = collapse_incremental_segments(result);
merge_identical_segments(result)
}
// ── Job processing ────────────────────────────────────────────────────────────
async fn process_job(
job: &Job,
audio_path: &std::path::Path,
job: &Job,
audio_path: &std::path::Path,
progress_tx: &ProgressTx,
cmd_tx: &std::sync::mpsc::SyncSender<WorkerCmd>,
storage: &Arc<Storage>,
cmd_tx: &std::sync::mpsc::SyncSender<WorkerCmd>,
storage: &Arc<Storage>,
) -> crate::Result<(Vec<Segment>, String, f32)> {
let pcm = decode_audio(audio_path).await?;
let total_secs = pcm.len() as f32 / 16_000.0;
let silence_mids = detect_silence_midpoints(audio_path).await;
let cuts = snap_to_silence(&silence_mids, total_secs, TARGET_CHUNK_SECS, SNAP_WINDOW_SECS);
let cuts = snap_to_silence(
&silence_mids,
total_secs,
TARGET_CHUNK_SECS,
SNAP_WINDOW_SECS,
);
let chunks = to_chunk_ranges(&cuts, total_secs);
let n = chunks.len();
@@ -620,12 +794,12 @@ async fn process_job(
for (ci, (chunk_start, chunk_end)) in chunks.iter().enumerate() {
let s0 = (*chunk_start * 16_000.0) as usize;
let s1 = ((*chunk_end * 16_000.0) as usize).min(pcm.len());
let s1 = ((*chunk_end * 16_000.0) as usize).min(pcm.len());
let mut chunk_pcm = pcm[s0..s1].to_vec();
trim_trailing_silence(&mut chunk_pcm);
let base = (ci * 100 / n) as u8;
let span = (100usize / n).max(1) as u8;
let base = (ci * 100 / n) as u8;
let span = (100usize / n).max(1) as u8;
// Save progress to disk before emitting SSE — polling clients who respond
// immediately to the SSE event will then see consistent state.
@@ -637,49 +811,52 @@ async fn process_job(
let _ = progress_tx.send(ProgressEvent::Progress {
percent: base,
chunk: ci + 1,
total: n,
chunk: ci + 1,
total: n,
});
let tx = progress_tx.clone();
let tx = progress_tx.clone();
let chunk_num = ci + 1;
let on_progress = Box::new(move |p: u8| {
let overall = base.saturating_add(p.saturating_mul(span) / 100);
let _ = tx.send(ProgressEvent::Progress {
percent: overall,
chunk: chunk_num,
total: n,
chunk: chunk_num,
total: n,
});
});
let (reply_tx, reply_rx) = oneshot::channel();
cmd_tx.send(WorkerCmd::Transcribe(TranscribeRequest {
pcm: chunk_pcm,
language: job.language.clone(),
task: job.task.clone(),
on_progress,
reply: reply_tx,
})).map_err(|_| AppError::Internal("worker command channel closed".into()))?;
cmd_tx
.send(WorkerCmd::Transcribe(TranscribeRequest {
pcm: chunk_pcm,
language: job.language.clone(),
task: job.task.clone(),
on_progress,
reply: reply_tx,
}))
.map_err(|_| AppError::Internal("worker command channel closed".into()))?;
let (mut segs, lang) = reply_rx.await
let (mut segs, lang) = reply_rx
.await
.map_err(|_| AppError::Internal("transcriber thread dropped reply".into()))??;
let offset = *chunk_start;
for seg in &mut segs {
seg.start += offset;
seg.end += offset;
seg.end += offset;
for word in &mut seg.words {
word.start += offset;
word.end += offset;
word.end += offset;
}
}
tracing::debug!(
chunk = ci + 1,
of = n,
of = n,
start = chunk_start,
end = chunk_end,
segs = segs.len(),
end = chunk_end,
segs = segs.len(),
"chunk done"
);
@@ -689,24 +866,30 @@ async fn process_job(
}
}
all_segments = normalise_segments(all_segments);
for (i, seg) in all_segments.iter_mut().enumerate() {
seg.index = i as i32;
}
let _ = progress_tx.send(ProgressEvent::Progress { percent: 100, chunk: n, total: n });
let _ = progress_tx.send(ProgressEvent::Progress {
percent: 100,
chunk: n,
total: n,
});
Ok((all_segments, language, total_secs))
}
fn trim_trailing_silence(pcm: &mut Vec<f32>) {
const THRESHOLD: f32 = 0.017_8;
const PADDING: usize = 8_000;
const PADDING: usize = 8_000;
if let Some(last_loud) = pcm.iter().rposition(|&s| s.abs() > THRESHOLD) {
let new_len = (last_loud + 1 + PADDING).min(pcm.len());
if new_len < pcm.len() {
tracing::trace!(
original_samples = pcm.len(),
trimmed_samples = pcm.len() - new_len,
trimmed_samples = pcm.len() - new_len,
"trimmed trailing silence"
);
pcm.truncate(new_len);
@@ -719,11 +902,17 @@ async fn decode_audio(path: &std::path::Path) -> crate::Result<Vec<f32>> {
let output = Command::new("ffmpeg")
.args([
"-nostdin", "-threads", "0",
"-i", path.to_str().unwrap_or(""),
"-f", "f32le",
"-ac", "1",
"-ar", "16000",
"-nostdin",
"-threads",
"0",
"-i",
path.to_str().unwrap_or(""),
"-f",
"f32le",
"-ac",
"1",
"-ar",
"16000",
"-",
])
.output()
@@ -760,13 +949,28 @@ pub fn audio_path_for(id: &JobId) -> PathBuf {
#[cfg(test)]
mod tests {
use super::*;
use crate::models::Word;
fn segment(index: i32, start: f32, end: f32, text: &str) -> Segment {
Segment {
index,
start,
end,
text: text.into(),
words: Vec::<Word>::new(),
}
}
#[test]
fn test_snap_to_silence_uses_nearest_midpoint() {
let mids = vec![55.0, 58.0, 62.0];
let cuts = snap_to_silence(&mids, 120.0, 60.0, 30.0);
assert!(!cuts.is_empty());
assert!((cuts[0] - 58.0).abs() < 0.01, "expected ~58.0, got {}", cuts[0]);
assert!(
(cuts[0] - 58.0).abs() < 0.01,
"expected ~58.0, got {}",
cuts[0]
);
}
#[test]
@@ -801,4 +1005,53 @@ mod tests {
trim_trailing_silence(&mut pcm);
assert_eq!(pcm.len(), (10_001 + 8_000).min(32_000));
}
#[test]
fn test_normalise_segments_collapses_prefix_growth_chain() {
let input = vec![
segment(0, 15.24, 16.6, "Hello everyone."),
segment(1, 16.6, 19.47, "Hello everyone. Um, welcome to this talk."),
segment(2, 19.47, 19.48, "Um, welcome to this talk."),
segment(
3,
19.48,
21.67,
"Um, welcome to this talk. I'll be speaking about small model",
),
segment(4, 21.67, 21.68, "I'll be speaking about small model"),
segment(
5,
21.68,
24.59,
"I'll be speaking about small model inference and a gap that we've",
),
];
let result = normalise_segments(input);
assert_eq!(result.len(), 2);
assert_eq!(result[0].text, "Hello everyone. Um, welcome to this talk.");
assert!((result[0].start - 15.24).abs() < 0.01);
assert!((result[0].end - 19.48).abs() < 0.01);
assert_eq!(
result[1].text,
"I'll be speaking about small model inference and a gap that we've"
);
assert!((result[1].start - 19.48).abs() < 0.01);
assert!((result[1].end - 24.59).abs() < 0.01);
}
#[test]
fn test_normalise_segments_keeps_real_gap() {
let input = vec![
segment(0, 0.0, 1.0, "Hello everyone."),
segment(1, 2.0, 4.0, "Hello everyone. Welcome back."),
];
let result = normalise_segments(input);
assert_eq!(result.len(), 2);
assert_eq!(result[0].text, "Hello everyone.");
assert_eq!(result[1].text, "Hello everyone. Welcome back.");
}
}