From 49850708d1ec412baff45668a605d2dcb3212b7e Mon Sep 17 00:00:00 2001 From: Jerjou Cheng Date: Tue, 17 May 2016 16:44:22 -0700 Subject: [PATCH] Add sample for speech api on GCS file via grpc. --- speech/api/README.md | 2 +- speech/api/grpc_auth.py | 0 ...aming.txt => requirements-speech_grpc.txt} | 0 speech/api/speech_gcs.py | 92 +++++++++++++++++++ speech/api/speech_gcs_test.py | 38 ++++++++ speech/api/speech_streaming.py | 28 ++++-- 6 files changed, 152 insertions(+), 8 deletions(-) create mode 100644 speech/api/grpc_auth.py rename speech/api/{requirements-speech_streaming.txt => requirements-speech_grpc.txt} (100%) create mode 100644 speech/api/speech_gcs.py create mode 100644 speech/api/speech_gcs_test.py diff --git a/speech/api/README.md b/speech/api/README.md index cfeb46a356c..0998e67a493 100644 --- a/speech/api/README.md +++ b/speech/api/README.md @@ -49,7 +49,7 @@ for more information. * If you're running the `speech_streaming.py` sample: ```sh - $ pip install -r requirements-speech_streaming.txt + $ pip install -r requirements-speech_grpc.txt ``` The sample uses the [PyAudio][pyaudio] library to stream audio from your diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/speech/api/requirements-speech_streaming.txt b/speech/api/requirements-speech_grpc.txt similarity index 100% rename from speech/api/requirements-speech_streaming.txt rename to speech/api/requirements-speech_grpc.txt diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py new file mode 100644 index 00000000000..b25956c50ae --- /dev/null +++ b/speech/api/speech_gcs.py @@ -0,0 +1,92 @@ +#!/usr/bin/python +# Copyright (C) 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage, +using GRPC.""" + +import argparse + +from gcloud.credentials import get_credentials +from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech +from grpc.beta import implementations + +# Keep the request alive for this many seconds +DEADLINE_SECS = 10 +SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + + +def make_channel(host, port): + """Creates an SSL channel with auth credentials from the environment.""" + # In order to make an https call, use an ssl channel with defaults + ssl_channel = implementations.ssl_channel_credentials(None, None, None) + + # Grab application default credentials from the environment + creds = get_credentials().create_scoped([SPEECH_SCOPE]) + # Add a plugin to inject the creds into the header + auth_header = ( + 'Authorization', + 'Bearer ' + creds.get_access_token().access_token) + auth_plugin = implementations.metadata_call_credentials( + lambda _, cb: cb([auth_header], None), + name='google_creds') + + # compose the two together for both ssl and google auth + composite_channel = implementations.composite_channel_credentials( + ssl_channel, auth_plugin) + + return implementations.secure_channel(host, port, composite_channel) + + +def main(input_uri, output_uri, encoding, sample_rate): + service = cloud_speech.beta_create_Speech_stub( + make_channel('speech.googleapis.com', 443)) + # The method and parameters can be inferred from the proto from which the + # grpc client lib was generated. See: + # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto + response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest( + initial_request=cloud_speech.InitialRecognizeRequest( + encoding=encoding, + sample_rate=sample_rate, + output_uri=output_uri, + ), + audio_request=cloud_speech.AudioRequest( + uri=input_uri, + ) + ), DEADLINE_SECS) + # This shouldn't actually print anything, since the transcription is output + # to the GCS uri specified + print(response.responses) + + +def _gcs_uri(text): + if not text.startswith('gs://'): + raise ValueError( + 'Cloud Storage uri must be of the form gs://bucket/path/') + return text + + +PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' + 'google/cloud/speech/v1/cloud_speech.proto') +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_uri', type=_gcs_uri) + parser.add_argument('output_uri', type=_gcs_uri) + parser.add_argument( + '--encoding', default='FLAC', choices=[ + 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], + help='How the audio file is encoded. See {}#L67'.format(PROTO_URL)) + parser.add_argument('--sample_rate', default=16000) + + args = parser.parse_args() + main(args.input_uri, args.output_uri, args.encoding, args.sample_rate) diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py new file mode 100644 index 00000000000..b7b4857246f --- /dev/null +++ b/speech/api/speech_gcs_test.py @@ -0,0 +1,38 @@ +# Copyright 2016, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +import pytest +from speech_gcs import _gcs_uri +from speech_gcs import main + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), + reason=("grpc doesn't yet support python3 " + 'https://github.com/grpc/grpc/issues/282')) +def test_main(cloud_config, capsys): + input_uri = 'gs://{}/speech/clip.flac'.format(cloud_config.storage_bucket) + output_uri = 'gs://{}/speech/clip.txt'.format(cloud_config.storage_bucket) + + main(input_uri, output_uri, 'FLAC', 16000) + + out, err = capsys.readouterr() + assert '[]\n' == out + + +def test_gcs_uri(): + _gcs_uri('gs://bucket/path') + with pytest.raises(ValueError): + _gcs_uri('/local/path') diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py index 606a1c732f5..ac661540ec2 100644 --- a/speech/api/speech_streaming.py +++ b/speech/api/speech_streaming.py @@ -1,11 +1,25 @@ #!/usr/bin/python +# Copyright (C) 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sample that streams audio to the Google Cloud Speech API via GRPC.""" import contextlib import re import threading from gcloud.credentials import get_credentials -from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa +from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech from google.rpc import code_pb2 from grpc.beta import implementations import pyaudio @@ -70,7 +84,7 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): with record_audio(channels, rate, chunk) as audio_stream: # The initial request must contain metadata about the stream, so the # server knows how to interpret it. - metadata = InitialRecognizeRequest( + metadata = cloud_speech.InitialRecognizeRequest( encoding='LINEAR16', sample_rate=rate, # Note that setting interim_results to True means that you'll # likely get multiple results for the same bit of audio, as the @@ -80,9 +94,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): interim_results=True, continuous=False, ) data = audio_stream.read(chunk) - audio_request = AudioRequest(content=data) + audio_request = cloud_speech.AudioRequest(content=data) - yield RecognizeRequest( + yield cloud_speech.RecognizeRequest( initial_request=metadata, audio_request=audio_request) @@ -91,9 +105,9 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): if not data: raise StopIteration() # Subsequent requests can all just have the content - audio_request = AudioRequest(content=data) + audio_request = cloud_speech.AudioRequest(content=data) - yield RecognizeRequest(audio_request=audio_request) + yield cloud_speech.RecognizeRequest(audio_request=audio_request) def listen_print_loop(recognize_stream): @@ -116,7 +130,7 @@ def listen_print_loop(recognize_stream): def main(): stop_audio = threading.Event() - with beta_create_Speech_stub( + with cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) as service: try: listen_print_loop(