| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| * use this file except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| * License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| package android.speech.tts; |
| |
| import android.media.AudioFormat; |
| import android.media.AudioTrack; |
| import android.text.TextUtils; |
| import android.util.Log; |
| |
| import java.util.Iterator; |
| import java.util.concurrent.PriorityBlockingQueue; |
| import java.util.concurrent.atomic.AtomicLong; |
| |
| class AudioPlaybackHandler { |
| private static final String TAG = "TTS.AudioPlaybackHandler"; |
| private static final boolean DBG_THREADING = false; |
| private static final boolean DBG = false; |
| |
| private static final int MIN_AUDIO_BUFFER_SIZE = 8192; |
| |
| private static final int SYNTHESIS_START = 1; |
| private static final int SYNTHESIS_DATA_AVAILABLE = 2; |
| private static final int SYNTHESIS_DONE = 3; |
| |
| private static final int PLAY_AUDIO = 5; |
| private static final int PLAY_SILENCE = 6; |
| |
| private static final int SHUTDOWN = -1; |
| |
| private static final int DEFAULT_PRIORITY = 1; |
| private static final int HIGH_PRIORITY = 0; |
| |
| private final PriorityBlockingQueue<ListEntry> mQueue = |
| new PriorityBlockingQueue<ListEntry>(); |
| private final Thread mHandlerThread; |
| |
| private volatile MessageParams mCurrentParams = null; |
| // Used only for book keeping and error detection. |
| private volatile SynthesisMessageParams mLastSynthesisRequest = null; |
| // Used to order incoming messages in our priority queue. |
| private final AtomicLong mSequenceIdCtr = new AtomicLong(0); |
| |
| |
| AudioPlaybackHandler() { |
| mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); |
| } |
| |
| public void start() { |
| mHandlerThread.start(); |
| } |
| |
| /** |
| * Stops all synthesis for a given {@code token}. If the current token |
| * is currently being processed, an effort will be made to stop it but |
| * that is not guaranteed. |
| * |
| * NOTE: This assumes that all other messages in the queue with {@code token} |
| * have been removed already. |
| * |
| * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. |
| */ |
| private void stop(MessageParams token) { |
| if (token == null) { |
| return; |
| } |
| |
| if (DBG) Log.d(TAG, "Stopping token : " + token); |
| |
| if (token.getType() == MessageParams.TYPE_SYNTHESIS) { |
| AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); |
| if (current != null) { |
| // Stop the current audio track if it's still playing. |
| // The audio track is thread safe in this regard. The current |
| // handleSynthesisDataAvailable call will return soon after this |
| // call. |
| current.stop(); |
| } |
| // This is safe because PlaybackSynthesisCallback#stop would have |
| // been called before this method, and will no longer enqueue any |
| // audio for this token. |
| // |
| // (Even if it did, all it would result in is a warning message). |
| mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); |
| } else if (token.getType() == MessageParams.TYPE_AUDIO) { |
| ((AudioMessageParams) token).getPlayer().stop(); |
| // No cleanup required for audio messages. |
| } else if (token.getType() == MessageParams.TYPE_SILENCE) { |
| ((SilenceMessageParams) token).getConditionVariable().open(); |
| // No cleanup required for silence messages. |
| } |
| } |
| |
| // ----------------------------------------------------- |
| // Methods that add and remove elements from the queue. These do not |
| // need to be synchronized strictly speaking, but they make the behaviour |
| // a lot more predictable. (though it would still be correct without |
| // synchronization). |
| // ----------------------------------------------------- |
| |
| synchronized public void removePlaybackItems(String callingApp) { |
| if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp); |
| removeMessages(callingApp); |
| |
| final MessageParams current = getCurrentParams(); |
| if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) { |
| stop(current); |
| } |
| |
| final MessageParams lastSynthesis = mLastSynthesisRequest; |
| |
| if (lastSynthesis != null && lastSynthesis != current && |
| TextUtils.equals(callingApp, lastSynthesis.getCallingApp())) { |
| stop(lastSynthesis); |
| } |
| } |
| |
| synchronized public void removeAllItems() { |
| if (DBG_THREADING) Log.d(TAG, "Removing all items"); |
| removeAllMessages(); |
| |
| final MessageParams current = getCurrentParams(); |
| final MessageParams lastSynthesis = mLastSynthesisRequest; |
| stop(current); |
| |
| if (lastSynthesis != null && lastSynthesis != current) { |
| stop(lastSynthesis); |
| } |
| } |
| |
| /** |
| * @return false iff the queue is empty and no queue item is currently |
| * being handled, true otherwise. |
| */ |
| public boolean isSpeaking() { |
| return (mQueue.peek() != null) || (mCurrentParams != null); |
| } |
| |
| /** |
| * Shut down the audio playback thread. |
| */ |
| synchronized public void quit() { |
| removeAllMessages(); |
| stop(getCurrentParams()); |
| mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); |
| } |
| |
| synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { |
| if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); |
| mQueue.add(new ListEntry(SYNTHESIS_START, token)); |
| } |
| |
| synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { |
| if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); |
| mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); |
| } |
| |
| synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { |
| if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); |
| mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); |
| } |
| |
| synchronized void enqueueAudio(AudioMessageParams token) { |
| if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); |
| mQueue.add(new ListEntry(PLAY_AUDIO, token)); |
| } |
| |
| synchronized void enqueueSilence(SilenceMessageParams token) { |
| if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); |
| mQueue.add(new ListEntry(PLAY_SILENCE, token)); |
| } |
| |
| // ----------------------------------------- |
| // End of public API methods. |
| // ----------------------------------------- |
| |
| // ----------------------------------------- |
| // Methods for managing the message queue. |
| // ----------------------------------------- |
| |
| /* |
| * The MessageLoop is a handler like implementation that |
| * processes messages from a priority queue. |
| */ |
| private final class MessageLoop implements Runnable { |
| @Override |
| public void run() { |
| while (true) { |
| ListEntry entry = null; |
| try { |
| entry = mQueue.take(); |
| } catch (InterruptedException ie) { |
| return; |
| } |
| |
| if (entry.mWhat == SHUTDOWN) { |
| if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); |
| return; |
| } |
| |
| if (DBG) { |
| Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat |
| + " ,seqId : " + entry.mSequenceId); |
| } |
| |
| setCurrentParams(entry.mMessage); |
| handleMessage(entry); |
| setCurrentParams(null); |
| } |
| } |
| } |
| |
| /* |
| * Atomically clear the queue of all messages. |
| */ |
| synchronized private void removeAllMessages() { |
| mQueue.clear(); |
| } |
| |
| /* |
| * Remove all messages that originate from a given calling app. |
| */ |
| synchronized private void removeMessages(String callingApp) { |
| Iterator<ListEntry> it = mQueue.iterator(); |
| |
| while (it.hasNext()) { |
| final ListEntry current = it.next(); |
| // The null check is to prevent us from removing control messages, |
| // such as a shutdown message. |
| if (current.mMessage != null && |
| callingApp.equals(current.mMessage.getCallingApp())) { |
| it.remove(); |
| } |
| } |
| } |
| |
| /* |
| * An element of our priority queue of messages. Each message has a priority, |
| * and a sequence id (defined by the order of enqueue calls). Among messages |
| * with the same priority, messages that were received earlier win out. |
| */ |
| private final class ListEntry implements Comparable<ListEntry> { |
| final int mWhat; |
| final MessageParams mMessage; |
| final int mPriority; |
| final long mSequenceId; |
| |
| private ListEntry(int what, MessageParams message) { |
| this(what, message, DEFAULT_PRIORITY); |
| } |
| |
| private ListEntry(int what, MessageParams message, int priority) { |
| mWhat = what; |
| mMessage = message; |
| mPriority = priority; |
| mSequenceId = mSequenceIdCtr.incrementAndGet(); |
| } |
| |
| @Override |
| public int compareTo(ListEntry that) { |
| if (that == this) { |
| return 0; |
| } |
| |
| // Note that this is always 0, 1 or -1. |
| int priorityDiff = mPriority - that.mPriority; |
| if (priorityDiff == 0) { |
| // The == case cannot occur. |
| return (mSequenceId < that.mSequenceId) ? -1 : 1; |
| } |
| |
| return priorityDiff; |
| } |
| } |
| |
| private void setCurrentParams(MessageParams p) { |
| if (DBG_THREADING) { |
| if (p != null) { |
| Log.d(TAG, "Started handling :" + p); |
| } else { |
| Log.d(TAG, "End handling : " + mCurrentParams); |
| } |
| } |
| mCurrentParams = p; |
| } |
| |
| private MessageParams getCurrentParams() { |
| return mCurrentParams; |
| } |
| |
| // ----------------------------------------- |
| // Methods for dealing with individual messages, the methods |
| // below do the actual work. |
| // ----------------------------------------- |
| |
| private void handleMessage(ListEntry entry) { |
| final MessageParams msg = entry.mMessage; |
| if (entry.mWhat == SYNTHESIS_START) { |
| handleSynthesisStart(msg); |
| } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { |
| handleSynthesisDataAvailable(msg); |
| } else if (entry.mWhat == SYNTHESIS_DONE) { |
| handleSynthesisDone(msg); |
| } else if (entry.mWhat == PLAY_AUDIO) { |
| handleAudio(msg); |
| } else if (entry.mWhat == PLAY_SILENCE) { |
| handleSilence(msg); |
| } |
| } |
| |
| // Currently implemented as blocking the audio playback thread for the |
| // specified duration. If a call to stop() is made, the thread |
| // unblocks. |
| private void handleSilence(MessageParams msg) { |
| if (DBG) Log.d(TAG, "handleSilence()"); |
| SilenceMessageParams params = (SilenceMessageParams) msg; |
| params.getDispatcher().dispatchOnStart(); |
| if (params.getSilenceDurationMs() > 0) { |
| params.getConditionVariable().block(params.getSilenceDurationMs()); |
| } |
| params.getDispatcher().dispatchOnDone(); |
| if (DBG) Log.d(TAG, "handleSilence() done."); |
| } |
| |
| // Plays back audio from a given URI. No TTS engine involvement here. |
| private void handleAudio(MessageParams msg) { |
| if (DBG) Log.d(TAG, "handleAudio()"); |
| AudioMessageParams params = (AudioMessageParams) msg; |
| params.getDispatcher().dispatchOnStart(); |
| // Note that the BlockingMediaPlayer spawns a separate thread. |
| // |
| // TODO: This can be avoided. |
| params.getPlayer().startAndWait(); |
| params.getDispatcher().dispatchOnDone(); |
| if (DBG) Log.d(TAG, "handleAudio() done."); |
| } |
| |
| // Denotes the start of a new synthesis request. We create a new |
| // audio track, and prepare it for incoming data. |
| // |
| // Note that since all TTS synthesis happens on a single thread, we |
| // should ALWAYS see the following order : |
| // |
| // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone |
| // OR |
| // handleSynthesisCompleteDataAvailable. |
| private void handleSynthesisStart(MessageParams msg) { |
| if (DBG) Log.d(TAG, "handleSynthesisStart()"); |
| final SynthesisMessageParams param = (SynthesisMessageParams) msg; |
| |
| // Oops, looks like the engine forgot to call done(). We go through |
| // extra trouble to clean the data to prevent the AudioTrack resources |
| // from being leaked. |
| if (mLastSynthesisRequest != null) { |
| Log.e(TAG, "Error : Missing call to done() for request : " + |
| mLastSynthesisRequest); |
| handleSynthesisDone(mLastSynthesisRequest); |
| } |
| |
| mLastSynthesisRequest = param; |
| |
| // Create the audio track. |
| final AudioTrack audioTrack = createStreamingAudioTrack(param); |
| |
| if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); |
| |
| param.setAudioTrack(audioTrack); |
| msg.getDispatcher().dispatchOnStart(); |
| } |
| |
| // More data available to be flushed to the audio track. |
| private void handleSynthesisDataAvailable(MessageParams msg) { |
| final SynthesisMessageParams param = (SynthesisMessageParams) msg; |
| if (param.getAudioTrack() == null) { |
| Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); |
| return; |
| } |
| |
| if (param != mLastSynthesisRequest) { |
| Log.e(TAG, "Call to dataAvailable without done() / start()"); |
| return; |
| } |
| |
| final AudioTrack audioTrack = param.getAudioTrack(); |
| final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); |
| |
| if (bufferCopy == null) { |
| Log.e(TAG, "No buffers available to play."); |
| return; |
| } |
| |
| int playState = audioTrack.getPlayState(); |
| if (playState == AudioTrack.PLAYSTATE_STOPPED) { |
| if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); |
| audioTrack.play(); |
| } |
| int count = 0; |
| while (count < bufferCopy.mBytes.length) { |
| // Note that we don't take bufferCopy.mOffset into account because |
| // it is guaranteed to be 0. |
| int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length); |
| if (written <= 0) { |
| break; |
| } |
| count += written; |
| } |
| param.mBytesWritten += count; |
| param.mLogger.onPlaybackStart(); |
| } |
| |
| // Wait for the audio track to stop playing, and then release its resources. |
| private void handleSynthesisDone(MessageParams msg) { |
| final SynthesisMessageParams params = (SynthesisMessageParams) msg; |
| |
| if (DBG) Log.d(TAG, "handleSynthesisDone()"); |
| final AudioTrack audioTrack = params.getAudioTrack(); |
| |
| if (audioTrack == null) { |
| // There was already a call to handleSynthesisDone for |
| // this token. |
| return; |
| } |
| |
| if (params.mBytesWritten < params.mAudioBufferSize) { |
| if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " + |
| audioTrack.getPlayState()); |
| params.mIsShortUtterance = true; |
| audioTrack.stop(); |
| } |
| |
| if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + |
| audioTrack.hashCode()); |
| blockUntilDone(params); |
| if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); |
| |
| // The last call to AudioTrack.write( ) will return only after |
| // all data from the audioTrack has been sent to the mixer, so |
| // it's safe to release at this point. Make sure release() and the call |
| // that set the audio track to null are performed atomically. |
| synchronized (this) { |
| // Never allow the audioTrack to be observed in a state where |
| // it is released but non null. The only case this might happen |
| // is in the various stopFoo methods that call AudioTrack#stop from |
| // different threads, but they are synchronized on AudioPlayBackHandler#this |
| // too. |
| audioTrack.release(); |
| params.setAudioTrack(null); |
| } |
| if (params.isError()) { |
| params.getDispatcher().dispatchOnError(); |
| } else { |
| params.getDispatcher().dispatchOnDone(); |
| } |
| mLastSynthesisRequest = null; |
| params.mLogger.onWriteData(); |
| } |
| |
| /** |
| * The minimum increment of time to wait for an audiotrack to finish |
| * playing. |
| */ |
| private static final long MIN_SLEEP_TIME_MS = 20; |
| |
| /** |
| * The maximum increment of time to sleep while waiting for an audiotrack |
| * to finish playing. |
| */ |
| private static final long MAX_SLEEP_TIME_MS = 2500; |
| |
| /** |
| * The maximum amount of time to wait for an audio track to make progress while |
| * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but |
| * could happen in exceptional circumstances like a media_server crash. |
| */ |
| private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; |
| |
| private static void blockUntilDone(SynthesisMessageParams params) { |
| if (params.mAudioTrack == null || params.mBytesWritten <= 0) { |
| return; |
| } |
| |
| if (params.mIsShortUtterance) { |
| // In this case we would have called AudioTrack#stop() to flush |
| // buffers to the mixer. This makes the playback head position |
| // unobservable and notification markers do not work reliably. We |
| // have no option but to wait until we think the track would finish |
| // playing and release it after. |
| // |
| // This isn't as bad as it looks because (a) We won't end up waiting |
| // for much longer than we should because even at 4khz mono, a short |
| // utterance weighs in at about 2 seconds, and (b) such short utterances |
| // are expected to be relatively infrequent and in a stream of utterances |
| // this shows up as a slightly longer pause. |
| blockUntilEstimatedCompletion(params); |
| } else { |
| blockUntilCompletion(params); |
| } |
| } |
| |
| private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) { |
| final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; |
| final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz); |
| |
| if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); |
| |
| try { |
| Thread.sleep(estimatedTimeMs); |
| } catch (InterruptedException ie) { |
| // Do nothing. |
| } |
| } |
| |
| private static void blockUntilCompletion(SynthesisMessageParams params) { |
| final AudioTrack audioTrack = params.mAudioTrack; |
| final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; |
| |
| int previousPosition = -1; |
| int currentPosition = 0; |
| long blockedTimeMs = 0; |
| |
| while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && |
| audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { |
| |
| final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / |
| audioTrack.getSampleRate(); |
| final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); |
| |
| // Check if the audio track has made progress since the last loop |
| // iteration. We should then add in the amount of time that was |
| // spent sleeping in the last iteration. |
| if (currentPosition == previousPosition) { |
| // This works only because the sleep time that would have been calculated |
| // would be the same in the previous iteration too. |
| blockedTimeMs += sleepTimeMs; |
| // If we've taken too long to make progress, bail. |
| if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { |
| Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + |
| "for AudioTrack to make progress, Aborting"); |
| break; |
| } |
| } else { |
| blockedTimeMs = 0; |
| } |
| previousPosition = currentPosition; |
| |
| if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + |
| " Playback position : " + currentPosition + ", Length in frames : " |
| + lengthInFrames); |
| try { |
| Thread.sleep(sleepTimeMs); |
| } catch (InterruptedException ie) { |
| break; |
| } |
| } |
| } |
| |
| private static final long clip(long value, long min, long max) { |
| if (value < min) { |
| return min; |
| } |
| |
| if (value > max) { |
| return max; |
| } |
| |
| return value; |
| } |
| |
| private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) { |
| final int channelConfig = getChannelConfig(params.mChannelCount); |
| final int sampleRateInHz = params.mSampleRateInHz; |
| final int audioFormat = params.mAudioFormat; |
| |
| int minBufferSizeInBytes |
| = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); |
| int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); |
| |
| AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig, |
| audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); |
| if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { |
| Log.w(TAG, "Unable to create audio track."); |
| audioTrack.release(); |
| return null; |
| } |
| params.mAudioBufferSize = bufferSizeInBytes; |
| |
| setupVolume(audioTrack, params.mVolume, params.mPan); |
| return audioTrack; |
| } |
| |
| static int getChannelConfig(int channelCount) { |
| if (channelCount == 1) { |
| return AudioFormat.CHANNEL_OUT_MONO; |
| } else if (channelCount == 2){ |
| return AudioFormat.CHANNEL_OUT_STEREO; |
| } |
| |
| return 0; |
| } |
| |
| private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { |
| float vol = clip(volume, 0.0f, 1.0f); |
| float panning = clip(pan, -1.0f, 1.0f); |
| float volLeft = vol; |
| float volRight = vol; |
| if (panning > 0.0f) { |
| volLeft *= (1.0f - panning); |
| } else if (panning < 0.0f) { |
| volRight *= (1.0f + panning); |
| } |
| if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); |
| if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { |
| Log.e(TAG, "Failed to set volume"); |
| } |
| } |
| |
| private static float clip(float value, float min, float max) { |
| return value > max ? max : (value < min ? min : value); |
| } |
| |
| } |