8000 Add transcribe_many and make changes to other example scripts · TreHack/ffmpeg-python@d70d5a3 · GitHub
[go: up one dir, main page]

Skip to content

Commit d70d5a3

Browse files
committed
Add transcribe_many and make changes to other example scripts
1 parent f5f7ee2 commit d70d5a3

File tree

4 files changed

+93
-18
lines changed

4 files changed

+93
-18
lines changed

examples/requirements.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
ffmpeg
1+
ffmpeg-python
2+
gevent
3+
google-cloud-speech
4+
tqdm

examples/split_silence.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
logger.setLevel(logging.INFO)
1717

1818
DEFAULT_DURATION = 0.3
19-
DEFAULT_THRESHOLD = -60
19+
DEFAULT_THRESHOLD = -30
2020

2121
parser = argparse.ArgumentParser(description='Split media into separate chunks wherever silence occurs')
2222
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
@@ -26,6 +26,7 @@
2626
parser.add_argument('--start-time', type=float, help='Start time (seconds)')
2727
parser.add_argument('--end-time', type=float, help='End time (seconds)')
2828
parser.add_argument('-v', dest='verbose', action='store_true', help='Verbose mode')
29+
parser.add_argument('--padding', type=float, default=0., help='Output silence padding (seconds)')
2930

3031
silence_start_re = re.compile(' silence_start: (?P<start>[0-9]+(\.?[0-9]*))$')
3132
silence_end_re = re.compile(' silence_end: (?P<end>[0-9]+(\.?[0-9]*)) ')
@@ -110,6 +111,7 @@ def split_audio(
110111
silence_duration=DEFAULT_DURATION,
111112
start_time=None,
112113
end_time=None,
114+
padding=0.,
113115
verbose=False,
114116
):
115117
chunk_times = get_chunk_times(in_filename, silence_threshold, silence_duration, start_time, end_time)
@@ -121,16 +123,30 @@ def split_audio(
121123

122124
logger.info('{}: start={:.02f}, end={:.02f}, duration={:.02f}'.format(out_filename, start_time, end_time,
123125
time))
124-
_logged_popen(
125-
(ffmpeg
126-
.input(in_filename, ss=start_time, t=time)
127-
.output(out_filename)
128-
.overwrite_output()
129-
.compile()
130-
),
126+
127+
input = ffmpeg.input(in_filename, ss=start_time, t=time)
128+
129+
if padding > 0.:
130+
silence = ffmpeg.input('aevalsrc=0:0::duration={}'.format(padding), format='lavfi')
131+
input = ffmpeg.concat(silence, input, v=0, a=1)
132+
133+
ffmpeg_cmd = (input
134+
.output(out_filename)
135+
.overwrite_output()
136+
.compile()
137+
)
138+
print ffmpeg_cmd
139+
140+
p = _logged_popen(
141+
ffmpeg_cmd,
131142
stdout=subprocess.PIPE if not verbose else None,
132143
stderr=subprocess.PIPE if not verbose else None,
133-
).communicate()
144+
)
145+
out = p.communicate()
146+
if p.returncode != 0:
147+
if not verbose:
148+
sys.stderr.write(out[1])
149+
sys.exit(1)
134150

135151

136152
if __name__ == '__main__':

examples/transcribe.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#!/usr/bin/env python
22
from __future__ import unicode_literals
3+
import IPython
34

45
from google.cloud import speech
56
from google.cloud.speech import enums
67
from google.cloud.speech import types
8+
from google.protobuf.json_format import MessageToJson
79
import argparse
810
import ffmpeg
911
import logging
@@ -17,7 +19,10 @@
1719

1820

1921
parser = argparse.ArgumentParser(description='Convert speech audio to text using Google Speech API')
20-
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
22+
parser.add_argument('in_file', help='Input filename (`-` for stdin)')
23+
parser.add_argument('--out-file', type=argparse.FileType('w'), default='-',
24+
help='Output filename (defaults to stdout)')
25+
parser.add_argument('--json', action='store_true', help='Output raw JSON response')
2126

2227

2328
def decode_audio(in_filename, **input_kwargs):
@@ -38,25 +43,33 @@ def decode_audio(in_filename, **input_kwargs):
3843
return out[0]
3944

4045

41-
def get_transcripts(audio_data):
46+
def transcribe_data(audio_data):
4247
client = speech.SpeechClient()
4348
audio = types.RecognitionAudio(content=audio_data)
4449
config = types.RecognitionConfig(
4550
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
4651
sample_rate_hertz=16000,
4752
language_code='en-US'
4853
)
49-
response = client.recognize(config, audio)
50-
return [result.alternatives[0].transcript for result in response.results]
54+
return client.recognize(config, audio)
5155

5256

5357
def transcribe(in_filename):
5458
audio_data = decode_audio(in_filename)
55-
transcripts = get_transcripts(audio_data)
56-
for transcript in transcripts:
57-
print(repr(transcript.encode('utf-8')))
59+
return transcribe_data(audio_data)
60+
61+
62+
def transcribe_to_file(in_filename, out_file=sys.stdout, as_json=False):
63+
transcription = transcribe(in_filename)
64+
if as_json:
65+
out_file.write(MessageToJson(transcription).encode('utf-8'))
66+
else:
67+
transcripts = [result.alternatives[0].transcript for result in transcription.results]
68+
for transcript in transcripts:
69+
line = transcript + '\n'
70+
out_file.write(line.encode('utf-8'))
5871

5972

6073
if __name__ == '__main__':
6174
args = parser.parse_args()
62-
transcribe(args.in_filename)
75+
transcribe_to_file(args.in_file, args.out_file, as_json=args.json)

examples/transcribe_many.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env python
2+
from functools import partial
3+
from multiprocessing import Pool
4+
from transcribe import transcribe_to_file
5+
import argparse
6+
import os
7+
import logging
8+
9+
10+
logging.basicConfig(level=logging.INFO, format='%(message)s')
11+
logger = logging.getLogger(__file__)
12+
13+
DEFAULT_WORKER_COUNT = 20
14+
15+
parser = argparse.ArgumentParser(description='Transcribe multiple audio files in parallel using Google Speech API')
16+
parser.add_argument('in_filenames', nargs='+', help='Input filename(s)')
17+
parser.add_argument('--keep-suffix', action='store_true',
18+
help='Don\'t strip filename suffix when generating metadata .json output filename')
19+
parser.add_argument('--workers', default=DEFAULT_WORKER_COUNT,
20+
help='Number of workers (default {})'.format(DEFAULT_WORKER_COUNT))
21+
22+
23+
def transcribe_one(in_filename, keep_suffix=False):
24+
if keep_suffix:
25+
base_filename = in_filename
26+
else:
27+
base_filename = os.path.splitext(in_filename)[0]
28+
out_filename = '{}.json'.format(base_filename)
29+
logger.info('Starting: {} -> {}'.format(in_filename, out_filename))
30+
with open(out_filename, 'w') as out_file:
31+
transcribe_to_file(in_filename, out_file, as_json=True)
32+
logger.info('Finished: {} -> {}'.format(in_filename, out_filename))
33+
34+
35+
def transcribe_many(in_filenames, keep_suffix=False, worker_count=DEFAULT_WORKER_COUNT):
36+
pool = Pool(processes=worker_count)
37+
func = partial(transcribe_one, keep_suffix=keep_suffix)
38+
pool.map_async(func, in_filenames).get(99999999)
39+
40+
41+
if __name__ == '__main__':
42+
args = parser.parse_args()
43+
transcribe_many(args.in_filenames, args.keep_suffix)

0 commit comments

Comments
 (0)
0