10000 ✨ Text-to-speech task (#167) · huggingface/huggingface.js@40ec8e5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 40ec8e5

Browse files
authored
✨ Text-to-speech task (#167)
1 parent 6883aaa commit 40ec8e5

File tree

5 files changed

+64
-0
lines changed

5 files changed

+64
-0
lines changed

docs/inference/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ await hf.audioClassification({
138138
data: readFileSync('test/sample1.flac')
139139
})
140140

141+
await hf.textToSpeech({
142+
model: 'espnet/kan-bayashi_ljspeech_vits',
143+
inputs: 'hello human'
144+
})
145+
141146
// Computer Vision
142147

143148
await hf.imageClassification({
@@ -234,6 +239,7 @@ const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the
234239

235240
- [x] Automatic speech recognition
236241
- [x] Audio classification
242+
- [x] Text-to-Speech
237243

238244
### Computer Vision
239245

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { InferenceOutputError } from "../../lib/InferenceOutputError";
2+
import type { BaseArgs, Options } from "../../types";
3+
import { request } from "../custom/request";
4+
5+
export type TextToSpeechArgs = BaseArgs & {
6+
/**
7+
* The text to generate an audio from
8+
*/
9+
inputs: string;
10+
};
11+
12+
export type TextToSpeechOutput = Blob;
13+
14+
/**
15+
* This task synthesize an audio of a voice pronouncing a given text.
16+
* Recommended model: espnet/kan-bayashi_ljspeech_vits
17+
*/
18+
export async function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput> {
19+
const res = await request<TextToSpeechOutput>(args, options);
20+
const isValidOutput = res && res instanceof Blob;
21+
if (!isValidOutput) {
22+
throw new InferenceOutputError("Expected Blob");
23+
}
24+
return res;
25+
}

packages/inference/src/tasks/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export * from "./custom/streamingRequest";
55
// Audio tasks
66
export * from "./audio/audioClassification";
77
export * from "./audio/automaticSpeechRecognition";
8+
export * from "./audio/textToSpeech";
89

910
// Commputer Vision tasks
1011
export * from "./cv/imageClassification";

packages/inference/test/HfInference.spec.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,14 @@ describe.concurrent(
357357
])
358358
);
359359
});
360+
it("textToSpeech", async () => {
361+
expect(
362+
await hf.textToSpeech({
363+
model: "espnet/kan-bayashi_ljspeech_vits",
364+
inputs: "hello there!",
365+
})
366+
).toBeInstanceOf(Blob);
367+
});
360368
it("imageClassification", async () => {
361369
expect(
362370
await hf.imageClassification({

packages/inference/test/tapes.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,5 +604,29 @@
604604
"vary": "Origin, Access-Control-Request-Method, Access-Control-Request-Headers"
605605
}
606606
}
607+
},
608+
"fedb289665ec1890b722566764b838adcb7516d1bd5b18c0d777b8ed21e16034": {
609+
"url": "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits",
610+
"init": {
611+
"headers": {
612+
"Content-Type": "application/json"
613+
},
614+
"method": "POST",
615+
"body": "{\"inputs\":\"hello there!\",\"options\":{}}"
616+
},
617+
"response": {
618+
"body": "",
619+
"status": 200,
620+
"statusText": "OK",
621+
"headers": {
622+
"access-control-allow-credentials": "true",
623+
"access-control-expose-headers": "x-compute-type, x-compute-time",
624+
"connection": "keep-alive",
625+
"content-type": "audio/flac",
626+
"server": "uvicorn",
627+
"transfer-encoding": "chunked",
628+
"vary": "Accept-Encoding, Origin, Access-Control-Request-Method, Access-Control-Request-Headers"
629+
}
630+
}
607631
}
608632
}

0 commit comments

Comments
 (0)
0