blob: 717aeb6f00e5101b0bcfea06b0b0d4387c5dc16d [file] [log] [blame]
Bjorn Bringert50e657b2011-03-08 16:00:40 +00001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010019import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000020import android.util.Log;
21
Bjorn Bringert50e657b2011-03-08 16:00:40 +000022import java.io.IOException;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000023import java.nio.ByteBuffer;
24import java.nio.ByteOrder;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000025import java.nio.channels.FileChannel;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000026
27/**
28 * Speech synthesis request that writes the audio to a WAV file.
29 */
Narayan Kamathe22b69a2011-06-08 11:41:47 +010030class FileSynthesisCallback extends AbstractSynthesisCallback {
Bjorn Bringert50e657b2011-03-08 16:00:40 +000031
32 private static final String TAG = "FileSynthesisRequest";
33 private static final boolean DBG = false;
34
Bjorn Bringert71e0b482011-04-15 14:37:05 +010035 private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
36
Bjorn Bringert50e657b2011-03-08 16:00:40 +000037 private static final int WAV_HEADER_LENGTH = 44;
38 private static final short WAV_FORMAT_PCM = 0x0001;
39
40 private final Object mStateLock = new Object();
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000041
Bjorn Bringert50e657b2011-03-08 16:00:40 +000042 private int mSampleRateInHz;
43 private int mAudioFormat;
44 private int mChannelCount;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000045
46 private FileChannel mFileChannel;
47
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010048 private final UtteranceProgressDispatcher mDispatcher;
49 private final Object mCallerIdentity;
50
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000051 private boolean mStarted = false;
Bjorn Bringert360eb162011-04-19 09:20:35 +010052 private boolean mDone = false;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000053
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010054 /** Status code of synthesis */
55 protected int mStatusCode;
56
57 FileSynthesisCallback(FileChannel fileChannel, UtteranceProgressDispatcher dispatcher,
58 Object callerIdentity, boolean clientIsUsingV2) {
59 super(clientIsUsingV2);
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000060 mFileChannel = fileChannel;
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010061 mDispatcher = dispatcher;
62 mCallerIdentity = callerIdentity;
63 mStatusCode = TextToSpeechClient.Status.SUCCESS;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000064 }
65
66 @Override
67 void stop() {
68 synchronized (mStateLock) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010069 if (mDone) {
70 return;
71 }
72 if (mStatusCode == TextToSpeechClient.Status.STOPPED) {
73 return;
74 }
75
76 mStatusCode = TextToSpeechClient.Status.STOPPED;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000077 cleanUp();
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010078 if (mDispatcher != null) {
79 mDispatcher.dispatchOnStop();
80 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +000081 }
82 }
83
84 /**
85 * Must be called while holding the monitor on {@link #mStateLock}.
86 */
87 private void cleanUp() {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000088 closeFile();
Bjorn Bringert50e657b2011-03-08 16:00:40 +000089 }
90
91 /**
92 * Must be called while holding the monitor on {@link #mStateLock}.
93 */
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000094 private void closeFile() {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +010095 // File will be closed by the SpeechItem in the speech service.
96 mFileChannel = null;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000097 }
98
99 @Override
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100100 public int getMaxBufferSize() {
101 return MAX_AUDIO_BUFFER_SIZE;
102 }
103
104 @Override
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000105 public int start(int sampleRateInHz, int audioFormat, int channelCount) {
106 if (DBG) {
107 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
108 + "," + channelCount + ")");
109 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100110 FileChannel fileChannel = null;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000111 synchronized (mStateLock) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100112 if (mStatusCode == TextToSpeechClient.Status.STOPPED) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000113 if (DBG) Log.d(TAG, "Request has been aborted.");
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100114 return errorCodeOnStop();
115 }
116 if (mStatusCode != TextToSpeechClient.Status.SUCCESS) {
117 if (DBG) Log.d(TAG, "Error was raised");
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000118 return TextToSpeech.ERROR;
119 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000120 if (mStarted) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100121 Log.e(TAG, "Start called twice");
122 return TextToSpeech.ERROR;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000123 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000124 mStarted = true;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000125 mSampleRateInHz = sampleRateInHz;
126 mAudioFormat = audioFormat;
127 mChannelCount = channelCount;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000128
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100129 if (mDispatcher != null) {
130 mDispatcher.dispatchOnStart();
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000131 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100132 fileChannel = mFileChannel;
133 }
134
135 try {
136 fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
137 return TextToSpeech.SUCCESS;
138 } catch (IOException ex) {
139 Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
140 synchronized (mStateLock) {
141 cleanUp();
142 mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT;
143 }
144 return TextToSpeech.ERROR;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000145 }
146 }
147
148 @Override
149 public int audioAvailable(byte[] buffer, int offset, int length) {
150 if (DBG) {
151 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
152 + "," + length + ")");
153 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100154 FileChannel fileChannel = null;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000155 synchronized (mStateLock) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100156 if (mStatusCode == TextToSpeechClient.Status.STOPPED) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000157 if (DBG) Log.d(TAG, "Request has been aborted.");
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100158 return errorCodeOnStop();
159 }
160 if (mStatusCode != TextToSpeechClient.Status.SUCCESS) {
161 if (DBG) Log.d(TAG, "Error was raised");
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000162 return TextToSpeech.ERROR;
163 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000164 if (mFileChannel == null) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000165 Log.e(TAG, "File not open");
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100166 mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000167 return TextToSpeech.ERROR;
168 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100169 if (!mStarted) {
170 Log.e(TAG, "Start method was not called");
171 return TextToSpeech.ERROR;
172 }
173 fileChannel = mFileChannel;
174 }
175
176 try {
177 fileChannel.write(ByteBuffer.wrap(buffer, offset, length));
178 return TextToSpeech.SUCCESS;
179 } catch (IOException ex) {
180 Log.e(TAG, "Failed to write to output file descriptor", ex);
181 synchronized (mStateLock) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000182 cleanUp();
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100183 mStatusCode = TextToSpeechClient.Status.ERROR_OUTPUT;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000184 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100185 return TextToSpeech.ERROR;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000186 }
187 }
188
189 @Override
190 public int done() {
191 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100192 FileChannel fileChannel = null;
193
194 int sampleRateInHz = 0;
195 int audioFormat = 0;
196 int channelCount = 0;
197
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000198 synchronized (mStateLock) {
Narayan Kamath9c3d7a82012-07-20 18:01:43 +0100199 if (mDone) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100200 Log.w(TAG, "Duplicate call to done()");
201 // This is not an error that would prevent synthesis. Hence no
202 // setStatusCode is set.
Narayan Kamath9c3d7a82012-07-20 18:01:43 +0100203 return TextToSpeech.ERROR;
204 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100205 if (mStatusCode == TextToSpeechClient.Status.STOPPED) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000206 if (DBG) Log.d(TAG, "Request has been aborted.");
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100207 return errorCodeOnStop();
208 }
209 if (mDispatcher != null && mStatusCode != TextToSpeechClient.Status.SUCCESS &&
210 mStatusCode != TextToSpeechClient.Status.STOPPED) {
211 mDispatcher.dispatchOnError(mStatusCode);
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000212 return TextToSpeech.ERROR;
213 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000214 if (mFileChannel == null) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000215 Log.e(TAG, "File not open");
216 return TextToSpeech.ERROR;
217 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100218 mDone = true;
219 fileChannel = mFileChannel;
220 sampleRateInHz = mSampleRateInHz;
221 audioFormat = mAudioFormat;
222 channelCount = mChannelCount;
223 }
224
225 try {
226 // Write WAV header at start of file
227 fileChannel.position(0);
228 int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
229 fileChannel.write(
230 makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
231
232 synchronized (mStateLock) {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000233 closeFile();
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100234 if (mDispatcher != null) {
235 mDispatcher.dispatchOnSuccess();
236 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000237 return TextToSpeech.SUCCESS;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000238 }
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100239 } catch (IOException ex) {
240 Log.e(TAG, "Failed to write to output file descriptor", ex);
241 synchronized (mStateLock) {
242 cleanUp();
243 }
244 return TextToSpeech.ERROR;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000245 }
246 }
247
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100248 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +0100249 public void error() {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100250 error(TextToSpeechClient.Status.ERROR_SYNTHESIS);
251 }
252
253 @Override
254 public void error(int errorCode) {
Bjorn Bringert360eb162011-04-19 09:20:35 +0100255 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
256 synchronized (mStateLock) {
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100257 if (mDone) {
258 return;
259 }
Bjorn Bringert360eb162011-04-19 09:20:35 +0100260 cleanUp();
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100261 mStatusCode = errorCode;
262 }
263 }
264
265 @Override
266 public boolean hasStarted() {
267 synchronized (mStateLock) {
268 return mStarted;
269 }
270 }
271
272 @Override
273 public boolean hasFinished() {
274 synchronized (mStateLock) {
275 return mDone;
Bjorn Bringert360eb162011-04-19 09:20:35 +0100276 }
277 }
278
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000279 private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100280 int dataLength) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000281 // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
282 int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
283 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
284 short blockAlign = (short) (sampleSizeInBytes * channelCount);
285 short bitsPerSample = (short) (sampleSizeInBytes * 8);
286
287 byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
288 ByteBuffer header = ByteBuffer.wrap(headerBuf);
289 header.order(ByteOrder.LITTLE_ENDIAN);
290
291 header.put(new byte[]{ 'R', 'I', 'F', 'F' });
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100292 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000293 header.put(new byte[]{ 'W', 'A', 'V', 'E' });
294 header.put(new byte[]{ 'f', 'm', 't', ' ' });
295 header.putInt(16); // size of fmt chunk
296 header.putShort(WAV_FORMAT_PCM);
297 header.putShort((short) channelCount);
298 header.putInt(sampleRateInHz);
299 header.putInt(byteRate);
300 header.putShort(blockAlign);
301 header.putShort(bitsPerSample);
302 header.put(new byte[]{ 'd', 'a', 't', 'a' });
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000303 header.putInt(dataLength);
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000304 header.flip();
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000305
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000306 return header;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000307 }
308
Przemyslaw Szczepaniak90d15d22013-06-14 12:02:53 +0100309 @Override
310 public int fallback() {
311 synchronized (mStateLock) {
312 if (hasStarted() || hasFinished()) {
313 return TextToSpeech.ERROR;
314 }
315
316 mDispatcher.dispatchOnFallback();
317 mStatusCode = TextToSpeechClient.Status.SUCCESS;
318 return TextToSpeechClient.Status.SUCCESS;
319 }
320 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000321}