textagent.github.io/js/speech-worker.js at main · Textagent/textagent.github.io

History

116 lines (107 loc) · 4.67 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

// ============================================

// speech-worker.js — Whisper Large V3 Turbo ASR WebWorker (WASM fallback)

// Used when WebGPU is NOT available. WebGPU devices use voxtral-worker.js.

// Runs textagent/whisper-large-v3-turbo via @huggingface/transformers

// off the main thread for jank-free transcription.

// WER ~7.7% (batched)

// ============================================

import { pipeline, env } from '@huggingface/transformers';

// Model host — downloads ONNX models from textagent HuggingFace org

const MODEL_HOST = 'https://huggingface.co';

const MODEL_ORG_FALLBACK = 'onnx-community';

env.remoteHost = MODEL_HOST;

let transcriber = null;

self.addEventListener('message', async (e) => {

const { type, audio } = e.data;

if (type === 'init') {

try {

self.postMessage({ type: 'status', status: 'loading', message: '⏳ Downloading Whisper Large V3 Turbo (WASM)…' });

const pipelineOpts = {

dtype: 'q8',

device: 'wasm',

progress_callback: (progress) => {

if (progress.status === 'progress') {

self.postMessage({

type: 'progress',

file: progress.file,

loaded: progress.loaded,

total: progress.total,

percent: Math.round((progress.loaded / progress.total) * 100),

source: whisperModelId,

});

} else if (progress.status === 'initiate') {

self.postMessage({

type: 'status',

status: 'loading',

message: `Loading ${progress.file || 'model'}...`,

source: whisperModelId,

loadingPhase: 'initiate',

});

} else if (progress.status === 'done') {

self.postMessage({ type: 'progress-done', file: progress.file, source: whisperModelId, loadingPhase: 'done' });

}

};

// Try primary org (textagent), fall back to onnx-community

let whisperModelId = 'textagent/whisper-large-v3-turbo';

try {

transcriber = await pipeline(

'automatic-speech-recognition',

whisperModelId,

pipelineOpts,

);

} catch (primaryErr) {

console.warn(`textagent model failed: ${primaryErr.message}. Falling back to onnx-community…`);

self.postMessage({ type: 'status', status: 'loading', message: '⚠️ Falling back to onnx-community models…' });

whisperModelId = whisperModelId.replace('textagent/', MODEL_ORG_FALLBACK + '/');

transcriber = await pipeline(

'automatic-speech-recognition',

whisperModelId,

pipelineOpts,

);

}

self.postMessage({

type: 'status',

status: 'ready',

message: 'Whisper ready',

device: 'CPU (WASM)',

model: 'Whisper V3 Turbo',

});

} catch (err) {

self.postMessage({ type: 'error', message: err.message || String(err) });

}

return;

}

if (type === 'transcribe') {

if (!transcriber) {

self.postMessage({ type: 'error', message: 'Model not loaded yet' });

return;

}

try {

// Normalize audio to [-1, 1] range for best model accuracy

let normalizedAudio = audio;

let maxVal = 0;

for (let i = 0; i < audio.length; i++) {

const abs = Math.abs(audio[i]);

if (abs > maxVal) maxVal = abs;

}

if (maxVal > 0 && maxVal < 0.5) {

normalizedAudio = new Float32Array(audio.length);

const gain = 0.9 / maxVal;

for (let i = 0; i < audio.length; i++) {

normalizedAudio[i] = audio[i] * gain;

}

// Use language from caller, default to 'en'

const lang = e.data.lang || 'en';

const result = await transcriber(normalizedAudio, {

language: lang,

return_timestamps: false,

});

self.postMessage({ type: 'result', text: result.text });

} catch (err) {

self.postMessage({ type: 'error', message: err.message || String(err) });

}

return;

}

});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

speech-worker.js

Latest commit

History

speech-worker.js

File metadata and controls