8000 Updated Infinite streaming sample (#1422) · llozano100/java-docs-samples@8782eda · GitHub
[go: up one dir, main page]

Skip to content

Commit 8782eda

Browse files
author
Kristin Grace Galvin
authored
Updated Infinite streaming sample (GoogleCloudPlatform#1422)
* adding result end time and color-coded result output * updated infinited streaming sample to use result_end_time * lint fixes * more comments and formatting updates * Fix speech test. * More test adjustments.
1 parent d50c7d4 commit 8782eda

File tree

3 files changed

+137
-27
lines changed

3 files changed

+137
-27
lines changed

speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java

Lines changed: 131 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@
2020
import com.google.api.gax.rpc.ClientStream;
2121
import com.google.api.gax.rpc.ResponseObserver;
2222
import com.google.api.gax.rpc.StreamController;
23-
import com.google.cloud.speech.v1.RecognitionConfig;
24-
import com.google.cloud.speech.v1.SpeechClient;
25-
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
26-
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
27-
import com.google.cloud.speech.v1.StreamingRecognitionResult;
28-
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
29-
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
23+
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
24+
import com.google.cloud.speech.v1p1beta1.SpeechClient;
25+
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
26+
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
27+
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
28+
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
29+
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
3030
import com.google.protobuf.ByteString;
31+
import com.google.protobuf.Duration;
32+
import java.lang.Math;
33+
import java.text.DecimalFormat;
3134
import java.util.ArrayList;
3235
import java.util.concurrent.BlockingQueue;
3336
import java.util.concurrent.LinkedBlockingQueue;
@@ -39,11 +42,29 @@
3942

4043
public class InfiniteStreamRecognize {
4144

45+
private static final int STREAMING_LIMIT = 10000; // 10 seconds
46+
47+
public static final String RED = "\033[0;31m";
48+
public static final String GREEN = "\033[0;32m";
49+
public static final String YELLOW = "\033[0;33m";
50+
4251
// Creating shared object
4352
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
4453
private static TargetDataLine targetDataLine;
4554
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
4655

56+
private static int restartCounter = 0;
57+
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
58+
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
59+
private static int resultEndTimeInMS = 0;
60+
private static int isFinalEndTime = 0;
61+
private static int finalRequestEndTime = 0;
62+
private static boolean newStream = true;
63+
private static double bridgingOffset = 0;
64+
private static boolean lastTranscriptWasFinal = false;
65+
private static StreamController referenceToStreamController;
66+
private static ByteString tempByteString;
67+
4768
public static void main(String... args) {
4869
try {
4970
infiniteStreamingRecognize();
@@ -60,6 +81,7 @@ class MicBuffer implements Runnable {
6081

6182
@Override
6283
public void run() {
84+
System.out.println(YELLOW);
6385
System.out.println("Start speaking...Press Ctrl-C to stop");
6486
targetDataLine.start();
6587
byte[] data = new byte[BYTES_PER_BUFFER];
@@ -88,24 +110,48 @@ public void run() {
88110

89111
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
90112

91-
public void onStart(StreamController controller) {}
113+
public void onStart(StreamController controller) {
114+
referenceToStreamController = controller;
115+
}
92116

93117
public void onResponse(StreamingRecognizeResponse response) {
118+
94119
responses.add(response);
120+
95121
StreamingRecognitionResult result = response.getResultsList().get(0);
96-
// There can be several alternative transcripts for a given chunk of speech. Just
97-
// use the first (most likely) one here.
122+
123+
Duration resultEndTime = result.getResultEndTime();
124+
125+
resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
126+
+ (resultEndTime.getNanos() / 1000000));
127+
128+
double correctedTime = resultEndTimeInMS - bridgingOffset
129+
+ (STREAMING_LIMIT * restartCounter);
130+
DecimalFormat format = new DecimalFormat("0.#");
131+
98132
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
99-
System.out.printf("Transcript : %s\n", alternative.getTranscript());
100-
}
133+
if (result.getIsFinal()) {
134+
System.out.print(GREEN);
135+
System.out.print("\033[2K\r");
136+
System.out.printf("%s: %s\n", format.format(correctedTime),
137+
alternative.getTranscript());
101138

102-
public void onComplete() {
103-
System.out.println("Done");
104-
}
139+
isFinalEndTime = resultEndTimeInMS;
140+
lastTranscriptWasFinal = true;
141+
} else {
142+
System.out.print(RED);
143+
System.out.print("\033[2K\r");
144+
System.out.printf("%s: %s", format.format(correctedTime),
145+
alternative.getTranscript());
105146

106-
public void onError(Throwable t) {
107-
System.out.println(t);
147+
lastTranscriptWasFinal = false;
148+
}
108149
}
150+
151+
public void onComplete() {}
152+
153+
public void onError(Throwable t) {}
154+
109155
};
110156

111157
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
@@ -116,8 +162,12 @@ public void onError(Throwable t) {
116162
.setLanguageCode("en-US")
117163
.setSampleRateHertz(16000)
118164
.build();
165+
119166
StreamingRecognitionConfig streamingRecognitionConfig =
120-
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
167+
StreamingRecognitionConfig.newBuilder()
168+
.setConfig(recognitionConfig)
169+
.setInterimResults(true)
170+
.build();
121171

122172
StreamingRecognizeRequest request =
123173
StreamingRecognizeRequest.newBuilder()
@@ -151,23 +201,84 @@ public void onError(Throwable t) {
151201

152202
long estimatedTime = System.currentTimeMillis() - startTime;
153203

154-
if (estimatedTime >= 55000) {
204+
if (estimatedTime >= STREAMING_LIMIT) {
155205

156206
clientStream.closeSend();
207+
referenceToStreamController.cancel(); // remove Observer
208+
209+
if (resultEndTimeInMS > 0) {
210+
finalRequestEndTime = isFinalEndTime;
211+
}
212+
resultEndTimeInMS = 0;
213+
214+
lastAudioInput = null;
215+
lastAudioInput = audioInput;
216+
audioInput = new ArrayList<ByteString>();
217+
218+
restartCounter++;
219+
220+
if (!lastTranscriptWasFinal) {
221+
System.out.print('\n');
222+
}
223+
224+
newStream = true;
225+
157226
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
158227

159228
request =
160229
StreamingRecognizeRequest.newBuilder()
161230
.setStreamingConfig(streamingRecognitionConfig)
162231
.build();
163232

233+
System.out.println(YELLOW);
234+
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
235+
164236
startTime = System.currentTimeMillis();
165237

166238
} else {
239+
240+
if ((newStream) && (lastAudioInput.size() > 0)) {
241+
// if this is the first audio from a new request
242+
// calculate amount of unfinalized audio from last request
243+
// resend the audio to the speech client before incoming audio
244+
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
245+
// ms length of each chunk in previous request audio arrayList
246+
if (chunkTime != 0) {
247+
if (bridgingOffset < 0) {
248+
// bridging Offset accounts for time of resent audio
249+
// calculated from last request
250+
bridgingOffset = 0;
251+
}
252+
if (bridgingOffset > finalRequestEndTime) {
253+
bridgingOffset = finalRequestEndTime;
254+
}
255+
int chunksFromMS = (int) Math.floor((finalRequestEndTime
256+
- bridgingOffset) / chunkTime);
257+
// chunks from MS is number of chunks to resend
258+
bridgingOffset = (int) Math.floor((lastAudioInput.size()
259+
- chunksFromMS) * chunkTime);
260+
// set bridging offset for next request
261+
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {
262+
263+
request =
264+
StreamingRecognizeRequest.newBuilder()
265+
.setAudioContent(lastAudioInput.get(i))
266+
.build();
267+
clientStream.send(request);
268+
}
269+
}
270+
newStream = false;
271+
}
272+
273+
tempByteString = ByteString.copyFrom(sharedQueue.take());
274+
167275
request =
168276
StreamingRecognizeRequest.newBuilder()
169-
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
277+
.setAudioContent(tempByteString)
170278
.build();
279+
280+
audioInput.add(tempByteString);
281+
171282
}
172283

173284
clientStream.send(request);

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,6 @@ public void onError(Throwable t) {
699699
System.out.println("Stop speaking.");
700700
targetDataLine.stop();
701701
targetDataLine.close();
702-
break;
703702
}
704703
request =
705704
StreamingRecognizeRequest.newBuilder()

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,22 +113,22 @@ public void testStreamRecognize() throws Exception {
113113
@Test
114114
public void testAutoPunctuation() throws Exception {
115115
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
116-
String got = bout.toString();
117-
assertThat(got).contains("How old is the Brooklyn Bridge?");
116+
String got = bout.toString().toLowerCase();
117+
assertThat(got).contains("how old is the brooklyn bridge");
118118
}
119119

120120
@Test
121121
public void testGcsAutoPunctuation() throws Exception {
122122
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
123-
String got = bout.toString();
124-
assertThat(got).contains("How old is the Brooklyn Bridge?");
123+
String got = bout.toString().toLowerCase();
124+
assertThat(got).contains("how old is the brooklyn bridge");
125125
}
126126

127127
@Test
128128
public void testStreamAutoPunctuation() throws Exception {
129129
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
130-
String got = bout.toString();
131-
assertThat(got).contains("How old is the Brooklyn Bridge?");
130+
String got = bout.toString().toLowerCase();
131+
assertThat(got).contains("how old is the brooklyn bridge");
132132
}
133133

134134
@Test

0 commit comments

Comments
 (0)
0