8000 Add `--timing` option to transcribe.py; output json · magnusvmt/ffmpeg-python@87f8500 · GitHub
[go: up one dir, main page]

Skip to content

Commit 87f8500

Browse files
committed
Add --timing option to transcribe.py; output json
1 parent de1ec94 commit 87f8500

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

examples/transcribe.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22
from __future__ import unicode_literals
33

4+
from google.protobuf.json_format import MessageToJson
45
from google.cloud import speech
56
from google.cloud.speech import enums
67
from google.cloud.speech import types
@@ -9,6 +10,7 @@
910
import logging
1011
import subprocess
1112
import sys
13+
import IPython
1214

1315

1416
logging.basicConfig(level=logging.INFO, format='%(message)s')
@@ -18,6 +20,7 @@
1820

1921
parser = argparse.ArgumentParser(description='Convert speech audio to text using Google Speech API')
2022
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
23+
parser.add_argument('--timing', action='store_true', help='Include timing info')
2124

2225

2326
def decode_audio(in_filename, **input_kwargs):
@@ -38,25 +41,24 @@ def decode_audio(in_filename, **input_kwargs):
3841
return out[0]
3942

4043

41-
def get_transcripts(audio_data):
44+
def get_transcripts(audio_data, include_timing_info=False):
4245
client = speech.SpeechClient()
4346
audio = types.RecognitionAudio(content=audio_data)
4447
config = types.RecognitionConfig(
4548
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
4649
sample_rate_hertz=16000,
47-
language_code='en-US'
50+
language_code='en-US',
51+
enable_word_time_offsets=include_timing_info,
4852
)
49-
response = client.recognize(config, audio)
50-
return [result.alternatives[0].transcript for result in response.results]
53+
return client.recognize(config, audio)
5154

5255

53-
def transcribe(in_filename):
56+
def transcribe(in_filename, include_timing_info=False):
5457
audio_data = decode_audio(in_filename)
55-
transcripts = get_transcripts(audio_data)
56-
for transcript in transcripts:
57-
print(repr(transcript.encode('utf-8')))
58+
response = get_transcripts(audio_data, include_timing_info)
59+
print(MessageToJson(response, sort_keys=True))
5860

5961

6062
if __name__ == '__main__':
6163
args = parser.parse_args()
62-
transcribe(args.in_filename)
64+
transcribe(args.in_filename, args.timing)

0 commit comments

Comments
 (0)
0