-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeech-worker.js
More file actions
116 lines (107 loc) · 4.67 KB
/
speech-worker.js
File metadata and controls
116 lines (107 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// ============================================
// speech-worker.js — Whisper Large V3 Turbo ASR WebWorker (WASM fallback)
// Used when WebGPU is NOT available. WebGPU devices use voxtral-worker.js.
// Runs textagent/whisper-large-v3-turbo via @huggingface/transformers
// off the main thread for jank-free transcription.
// WER ~7.7% (batched)
// ============================================
import { pipeline, env } from '@huggingface/transformers';
// Model host — downloads ONNX models from textagent HuggingFace org
const MODEL_HOST = 'https://huggingface.co';
const MODEL_ORG_FALLBACK = 'onnx-community';
env.remoteHost = MODEL_HOST;
let transcriber = null;
self.addEventListener('message', async (e) => {
const { type, audio } = e.data;
if (type === 'init') {
try {
self.postMessage({ type: 'status', status: 'loading', message: '⏳ Downloading Whisper Large V3 Turbo (WASM)…' });
const pipelineOpts = {
dtype: 'q8',
device: 'wasm',
progress_callback: (progress) => {
if (progress.status === 'progress') {
self.postMessage({
type: 'progress',
file: progress.file,
loaded: progress.loaded,
total: progress.total,
percent: Math.round((progress.loaded / progress.total) * 100),
source: whisperModelId,
});
} else if (progress.status === 'initiate') {
self.postMessage({
type: 'status',
status: 'loading',
message: `Loading ${progress.file || 'model'}...`,
source: whisperModelId,
loadingPhase: 'initiate',
});
} else if (progress.status === 'done') {
self.postMessage({ type: 'progress-done', file: progress.file, source: whisperModelId, loadingPhase: 'done' });
}
},
};
// Try primary org (textagent), fall back to onnx-community
let whisperModelId = 'textagent/whisper-large-v3-turbo';
try {
transcriber = await pipeline(
'automatic-speech-recognition',
whisperModelId,
pipelineOpts,
);
} catch (primaryErr) {
console.warn(`textagent model failed: ${primaryErr.message}. Falling back to onnx-community…`);
self.postMessage({ type: 'status', status: 'loading', message: '⚠️ Falling back to onnx-community models…' });
whisperModelId = whisperModelId.replace('textagent/', MODEL_ORG_FALLBACK + '/');
transcriber = await pipeline(
'automatic-speech-recognition',
whisperModelId,
pipelineOpts,
);
}
self.postMessage({
type: 'status',
status: 'ready',
message: 'Whisper ready',
device: 'CPU (WASM)',
model: 'Whisper V3 Turbo',
});
} catch (err) {
self.postMessage({ type: 'error', message: err.message || String(err) });
}
return;
}
if (type === 'transcribe') {
if (!transcriber) {
self.postMessage({ type: 'error', message: 'Model not loaded yet' });
return;
}
try {
// Normalize audio to [-1, 1] range for best model accuracy
let normalizedAudio = audio;
let maxVal = 0;
for (let i = 0; i < audio.length; i++) {
const abs = Math.abs(audio[i]);
if (abs > maxVal) maxVal = abs;
}
if (maxVal > 0 && maxVal < 0.5) {
normalizedAudio = new Float32Array(audio.length);
const gain = 0.9 / maxVal;
for (let i = 0; i < audio.length; i++) {
normalizedAudio[i] = audio[i] * gain;
}
}
// Use language from caller, default to 'en'
const lang = e.data.lang || 'en';
const result = await transcriber(normalizedAudio, {
language: lang,
return_timestamps: false,
});
self.postMessage({ type: 'result', text: result.text });
} catch (err) {
self.postMessage({ type: 'error', message: err.message || String(err) });
}
return;
}
});