blob: dc5ff708efcdbb7af3e1e7d9e4969e91e3ec8136 [file] [log] [blame]
Bjorn Bringert50e657b2011-03-08 16:00:40 +00001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.media.AudioTrack;
20import android.util.Log;
21
22/**
23 * Speech synthesis request that plays the audio as it is received.
24 */
25class PlaybackSynthesisRequest extends SynthesisRequest {
26
27 private static final String TAG = "PlaybackSynthesisRequest";
28 private static final boolean DBG = false;
29
30 private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
31
32 /**
33 * Audio stream type. Must be one of the STREAM_ contants defined in
34 * {@link android.media.AudioManager}.
35 */
36 private final int mStreamType;
37
38 /**
39 * Volume, in the range [0.0f, 1.0f]. The default value is
40 * {@link TextToSpeech.Engine#DEFAULT_VOLUME} (1.0f).
41 */
42 private final float mVolume;
43
44 /**
45 * Left/right position of the audio, in the range [-1.0f, 1.0f].
46 * The default value is {@link TextToSpeech.Engine#DEFAULT_PAN} (0.0f).
47 */
48 private final float mPan;
49
50 private final Object mStateLock = new Object();
51 private AudioTrack mAudioTrack = null;
52 private boolean mStopped = false;
Bjorn Bringert360eb162011-04-19 09:20:35 +010053 private boolean mDone = false;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000054
55 PlaybackSynthesisRequest(String text, int streamType, float volume, float pan) {
56 super(text);
57 mStreamType = streamType;
58 mVolume = volume;
59 mPan = pan;
60 }
61
62 @Override
63 void stop() {
64 if (DBG) Log.d(TAG, "stop()");
65 synchronized (mStateLock) {
66 mStopped = true;
67 cleanUp();
68 }
69 }
70
71 private void cleanUp() {
72 if (DBG) Log.d(TAG, "cleanUp()");
73 if (mAudioTrack != null) {
74 mAudioTrack.flush();
75 mAudioTrack.stop();
Bjorn Bringert50e657b2011-03-08 16:00:40 +000076 mAudioTrack.release();
77 mAudioTrack = null;
78 }
79 }
80
Bjorn Bringert71e0b482011-04-15 14:37:05 +010081 @Override
82 public int getMaxBufferSize() {
83 // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be
84 // a safe buffer size to pass in.
85 return MIN_AUDIO_BUFFER_SIZE;
86 }
87
Bjorn Bringert360eb162011-04-19 09:20:35 +010088 @Override
89 boolean isDone() {
90 return mDone;
91 }
92
Bjorn Bringert50e657b2011-03-08 16:00:40 +000093 // TODO: add a thread that writes to the AudioTrack?
94 @Override
95 public int start(int sampleRateInHz, int audioFormat, int channelCount) {
96 if (DBG) {
97 Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat
98 + "," + channelCount + ")");
99 }
100
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000101 synchronized (mStateLock) {
102 if (mStopped) {
103 if (DBG) Log.d(TAG, "Request has been aborted.");
104 return TextToSpeech.ERROR;
105 }
106 if (mAudioTrack != null) {
107 Log.e(TAG, "start() called twice");
108 cleanUp();
109 return TextToSpeech.ERROR;
110 }
111
Narayan Kamath53f6f952011-04-19 16:39:20 +0100112 mAudioTrack = createStreamingAudioTrack(sampleRateInHz, audioFormat, channelCount);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100113 if (mAudioTrack == null) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000114 return TextToSpeech.ERROR;
115 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000116 }
117
118 return TextToSpeech.SUCCESS;
119 }
120
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100121 private void setupVolume(AudioTrack audioTrack, float volume, float pan) {
122 float vol = clip(volume, 0.0f, 1.0f);
123 float panning = clip(pan, -1.0f, 1.0f);
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000124 float volLeft = vol;
125 float volRight = vol;
126 if (panning > 0.0f) {
127 volLeft *= (1.0f - panning);
128 } else if (panning < 0.0f) {
129 volRight *= (1.0f + panning);
130 }
131 if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100132 if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000133 Log.e(TAG, "Failed to set volume");
134 }
135 }
136
137 private float clip(float value, float min, float max) {
138 return value > max ? max : (value < min ? min : value);
139 }
140
141 @Override
142 public int audioAvailable(byte[] buffer, int offset, int length) {
143 if (DBG) {
144 Log.d(TAG, "audioAvailable(byte[" + buffer.length + "],"
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100145 + offset + "," + length + ")");
146 }
147 if (length > getMaxBufferSize()) {
148 throw new IllegalArgumentException("buffer is too large (" + length + " bytes)");
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000149 }
150 synchronized (mStateLock) {
151 if (mStopped) {
152 if (DBG) Log.d(TAG, "Request has been aborted.");
153 return TextToSpeech.ERROR;
154 }
155 if (mAudioTrack == null) {
156 Log.e(TAG, "audioAvailable(): Not started");
157 return TextToSpeech.ERROR;
158 }
159 int playState = mAudioTrack.getPlayState();
160 if (playState == AudioTrack.PLAYSTATE_STOPPED) {
161 if (DBG) Log.d(TAG, "AudioTrack stopped, restarting");
162 mAudioTrack.play();
163 }
164 // TODO: loop until all data is written?
165 if (DBG) Log.d(TAG, "AudioTrack.write()");
166 int count = mAudioTrack.write(buffer, offset, length);
167 if (DBG) Log.d(TAG, "AudioTrack.write() returned " + count);
168 if (count < 0) {
169 Log.e(TAG, "Writing to AudioTrack failed: " + count);
170 cleanUp();
171 return TextToSpeech.ERROR;
172 } else {
173 return TextToSpeech.SUCCESS;
174 }
175 }
176 }
177
178 @Override
179 public int done() {
180 if (DBG) Log.d(TAG, "done()");
181 synchronized (mStateLock) {
182 if (mStopped) {
183 if (DBG) Log.d(TAG, "Request has been aborted.");
184 return TextToSpeech.ERROR;
185 }
186 if (mAudioTrack == null) {
187 Log.e(TAG, "done(): Not started");
188 return TextToSpeech.ERROR;
189 }
Bjorn Bringert360eb162011-04-19 09:20:35 +0100190 mDone = true;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000191 cleanUp();
192 }
193 return TextToSpeech.SUCCESS;
194 }
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100195
196 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +0100197 public void error() {
198 if (DBG) Log.d(TAG, "error()");
199 synchronized (mStateLock) {
200 cleanUp();
201 }
202 }
203
204 @Override
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100205 public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
206 byte[] buffer, int offset, int length) {
207 if (DBG) {
208 Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat
209 + "," + channelCount + "byte[" + buffer.length + "],"
210 + offset + "," + length + ")");
211 }
212
213 synchronized (mStateLock) {
214 if (mStopped) {
215 if (DBG) Log.d(TAG, "Request has been aborted.");
216 return TextToSpeech.ERROR;
217 }
218 if (mAudioTrack != null) {
219 Log.e(TAG, "start() called before completeAudioAvailable()");
220 cleanUp();
221 return TextToSpeech.ERROR;
222 }
223
Narayan Kamath53f6f952011-04-19 16:39:20 +0100224 int channelConfig = getChannelConfig(channelCount);
225 if (channelConfig < 0) {
226 Log.e(TAG, "Unsupported number of channels :" + channelCount);
227 cleanUp();
228 return TextToSpeech.ERROR;
229 }
230 int bytesPerFrame = getBytesPerFrame(audioFormat);
231 if (bytesPerFrame < 0) {
232 Log.e(TAG, "Unsupported audio format :" + audioFormat);
233 cleanUp();
234 return TextToSpeech.ERROR;
235 }
236
237 mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig,
238 audioFormat, buffer.length, AudioTrack.MODE_STATIC);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100239 if (mAudioTrack == null) {
240 return TextToSpeech.ERROR;
241 }
242
243 try {
244 mAudioTrack.write(buffer, offset, length);
Narayan Kamath53f6f952011-04-19 16:39:20 +0100245 setupVolume(mAudioTrack, mVolume, mPan);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100246 mAudioTrack.play();
Narayan Kamath53f6f952011-04-19 16:39:20 +0100247 blockUntilDone(mAudioTrack, bytesPerFrame, length);
Bjorn Bringert360eb162011-04-19 09:20:35 +0100248 mDone = true;
Narayan Kamath53f6f952011-04-19 16:39:20 +0100249 if (DBG) Log.d(TAG, "Wrote data to audio track succesfully : " + length);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100250 } catch (IllegalStateException ex) {
251 Log.e(TAG, "Playback error", ex);
252 return TextToSpeech.ERROR;
253 } finally {
254 cleanUp();
255 }
256 }
257
258 return TextToSpeech.SUCCESS;
259 }
260
Narayan Kamath53f6f952011-04-19 16:39:20 +0100261 private void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) {
262 int lengthInFrames = length / bytesPerFrame;
263 int currentPosition = 0;
264 while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) {
265 long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
266 audioTrack.getSampleRate();
267 if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," +
268 " Playback position : " + currentPosition);
269 try {
270 Thread.sleep(estimatedTimeMs);
271 } catch (InterruptedException ie) {
272 break;
273 }
274 }
275 }
276
277 private int getBytesPerFrame(int audioFormat) {
278 if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
279 return 1;
280 } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
281 return 2;
282 }
283
284 return -1;
285 }
286
287 private int getChannelConfig(int channelCount) {
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100288 if (channelCount == 1) {
Narayan Kamath53f6f952011-04-19 16:39:20 +0100289 return AudioFormat.CHANNEL_OUT_MONO;
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100290 } else if (channelCount == 2){
Narayan Kamath53f6f952011-04-19 16:39:20 +0100291 return AudioFormat.CHANNEL_OUT_STEREO;
292 }
293
294 return -1;
295 }
296
297 private AudioTrack createStreamingAudioTrack(int sampleRateInHz, int audioFormat,
298 int channelCount) {
299 int channelConfig = getChannelConfig(channelCount);
300
301 if (channelConfig < 0) {
302 Log.e(TAG, "Unsupported number of channels : " + channelCount);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100303 return null;
304 }
305
306 int minBufferSizeInBytes
307 = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
308 int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
309 AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig,
Narayan Kamath53f6f952011-04-19 16:39:20 +0100310 audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100311 if (audioTrack == null) {
312 return null;
313 }
Narayan Kamath53f6f952011-04-19 16:39:20 +0100314
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100315 if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
316 audioTrack.release();
317 return null;
318 }
319 setupVolume(audioTrack, mVolume, mPan);
320 return audioTrack;
321 }
Narayan Kamath53f6f952011-04-19 16:39:20 +0100322}