1
1
#!/usr/bin/env python
2
2
from __future__ import unicode_literals
3
3
4
+ from google .protobuf .json_format import MessageToJson
4
5
from google .cloud import speech
5
6
from google .cloud .speech import enums
6
7
from google .cloud .speech import types
9
10
import logging
10
11
import subprocess
11
12
import sys
13
+ import IPython
12
14
13
15
14
16
logging .basicConfig (level = logging .INFO , format = '%(message)s' )
18
20
19
21
parser = argparse .ArgumentParser (description = 'Convert speech audio to text using Google Speech API' )
20
22
parser .add_argument ('in_filename' , help = 'Input filename (`-` for stdin)' )
23
+ parser .add_argument ('--timing' , action = 'store_true' , help = 'Include timing info' )
21
24
22
25
23
26
def decode_audio (in_filename , ** input_kwargs ):
@@ -38,25 +41,24 @@ def decode_audio(in_filename, **input_kwargs):
38
41
return out [0 ]
39
42
40
43
41
- def get_transcripts (audio_data ):
44
+ def get_transcripts (audio_data , include_timing_info = False ):
42
45
client = speech .SpeechClient ()
43
46
audio = types .RecognitionAudio (content = audio_data )
44
47
config = types .RecognitionConfig (
45
48
encoding = enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
46
49
sample_rate_hertz = 16000 ,
47
- language_code = 'en-US'
50
+ language_code = 'en-US' ,
51
+ enable_word_time_offsets = include_timing_info ,
48
52
)
49
- response = client .recognize (config , audio )
50
- return [result .alternatives [0 ].transcript for result in response .results ]
53
+ return client .recognize (config , audio )
51
54
52
55
53
- def transcribe (in_filename ):
56
+ def transcribe (in_filename , include_timing_info = False ):
54
57
audio_data = decode_audio (in_filename )
55
- transcripts = get_transcripts (audio_data )
56
- for transcript in transcripts :
57
- print (repr (transcript .encode ('utf-8' )))
58
+ response = get_transcripts (audio_data , include_timing_info )
59
+ print (MessageToJson (response , sort_keys = True ))
58
60
59
61
60
62
if __name__ == '__main__' :
61
63
args = parser .parse_args ()
62
- transcribe (args .in_filename )
64
+ transcribe (args .in_filename , args . timing )
0 commit comments