| // Copyright 2011 Google Inc. All Rights Reserved. |
| |
| package android.speech.tts; |
| |
| import android.media.AudioFormat; |
| import android.media.AudioTrack; |
| import android.speech.tts.TextToSpeechService.AudioOutputParams; |
| import android.util.Log; |
| |
| /** |
| * Exposes parts of the {@link AudioTrack} API by delegating calls to an |
| * underlying {@link AudioTrack}. Additionally, provides methods like |
| * {@link #waitAndRelease()} that will block until all audiotrack |
| * data has been flushed to the mixer, and is estimated to have completed |
| * playback. |
| */ |
| class BlockingAudioTrack { |
| private static final String TAG = "TTS.BlockingAudioTrack"; |
| private static final boolean DBG = false; |
| |
| |
| /** |
| * The minimum increment of time to wait for an AudioTrack to finish |
| * playing. |
| */ |
| private static final long MIN_SLEEP_TIME_MS = 20; |
| |
| /** |
| * The maximum increment of time to sleep while waiting for an AudioTrack |
| * to finish playing. |
| */ |
| private static final long MAX_SLEEP_TIME_MS = 2500; |
| |
| /** |
| * The maximum amount of time to wait for an audio track to make progress while |
| * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but |
| * could happen in exceptional circumstances like a media_server crash. |
| */ |
| private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; |
| |
| /** |
| * Minimum size of the buffer of the underlying {@link android.media.AudioTrack} |
| * we create. |
| */ |
| private static final int MIN_AUDIO_BUFFER_SIZE = 8192; |
| |
| |
| private final AudioOutputParams mAudioParams; |
| private final int mSampleRateInHz; |
| private final int mAudioFormat; |
| private final int mChannelCount; |
| |
| |
| private final int mBytesPerFrame; |
| /** |
| * A "short utterance" is one that uses less bytes than the audio |
| * track buffer size (mAudioBufferSize). In this case, we need to call |
| * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly |
| * different logic is required to wait for the track to finish. |
| * |
| * Not volatile, accessed only from the audio playback thread. |
| */ |
| private boolean mIsShortUtterance; |
| /** |
| * Will be valid after a call to {@link #init()}. |
| */ |
| private int mAudioBufferSize; |
| private int mBytesWritten = 0; |
| |
| // Need to be seen by stop() which can be called from another thread. mAudioTrack will be |
| // set to null only after waitAndRelease(). |
| private Object mAudioTrackLock = new Object(); |
| private AudioTrack mAudioTrack; |
| private volatile boolean mStopped; |
| |
| private int mSessionId; |
| |
| BlockingAudioTrack(AudioOutputParams audioParams, int sampleRate, |
| int audioFormat, int channelCount) { |
| mAudioParams = audioParams; |
| mSampleRateInHz = sampleRate; |
| mAudioFormat = audioFormat; |
| mChannelCount = channelCount; |
| |
| mBytesPerFrame = AudioFormat.getBytesPerSample(mAudioFormat) * mChannelCount; |
| mIsShortUtterance = false; |
| mAudioBufferSize = 0; |
| mBytesWritten = 0; |
| |
| mAudioTrack = null; |
| mStopped = false; |
| } |
| |
| public boolean init() { |
| AudioTrack track = createStreamingAudioTrack(); |
| synchronized (mAudioTrackLock) { |
| mAudioTrack = track; |
| } |
| |
| if (track == null) { |
| return false; |
| } else { |
| return true; |
| } |
| } |
| |
| public void stop() { |
| synchronized (mAudioTrackLock) { |
| if (mAudioTrack != null) { |
| mAudioTrack.stop(); |
| } |
| mStopped = true; |
| } |
| } |
| |
| public int write(byte[] data) { |
| AudioTrack track = null; |
| synchronized (mAudioTrackLock) { |
| track = mAudioTrack; |
| } |
| |
| if (track == null || mStopped) { |
| return -1; |
| } |
| final int bytesWritten = writeToAudioTrack(track, data); |
| |
| mBytesWritten += bytesWritten; |
| return bytesWritten; |
| } |
| |
| public void waitAndRelease() { |
| AudioTrack track = null; |
| synchronized (mAudioTrackLock) { |
| track = mAudioTrack; |
| } |
| if (track == null) { |
| if (DBG) Log.d(TAG, "Audio track null [duplicate call to waitAndRelease ?]"); |
| return; |
| } |
| |
| // For "small" audio tracks, we have to stop() them to make them mixable, |
| // else the audio subsystem will wait indefinitely for us to fill the buffer |
| // before rendering the track mixable. |
| // |
| // If mStopped is true, the track would already have been stopped, so not |
| // much point not doing that again. |
| if (mBytesWritten < mAudioBufferSize && !mStopped) { |
| if (DBG) { |
| Log.d(TAG, "Stopping audio track to flush audio, state was : " + |
| track.getPlayState() + ",stopped= " + mStopped); |
| } |
| |
| mIsShortUtterance = true; |
| track.stop(); |
| } |
| |
| // Block until the audio track is done only if we haven't stopped yet. |
| if (!mStopped) { |
| if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode()); |
| blockUntilDone(mAudioTrack); |
| } |
| |
| // The last call to AudioTrack.write( ) will return only after |
| // all data from the audioTrack has been sent to the mixer, so |
| // it's safe to release at this point. |
| if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]"); |
| synchronized(mAudioTrackLock) { |
| mAudioTrack = null; |
| } |
| track.release(); |
| } |
| |
| |
| static int getChannelConfig(int channelCount) { |
| if (channelCount == 1) { |
| return AudioFormat.CHANNEL_OUT_MONO; |
| } else if (channelCount == 2){ |
| return AudioFormat.CHANNEL_OUT_STEREO; |
| } |
| |
| return 0; |
| } |
| |
| long getAudioLengthMs(int numBytes) { |
| final int unconsumedFrames = numBytes / mBytesPerFrame; |
| final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz; |
| |
| return estimatedTimeMs; |
| } |
| |
| private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) { |
| if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) { |
| if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode()); |
| audioTrack.play(); |
| } |
| |
| int count = 0; |
| while (count < bytes.length) { |
| // Note that we don't take bufferCopy.mOffset into account because |
| // it is guaranteed to be 0. |
| int written = audioTrack.write(bytes, count, bytes.length); |
| if (written <= 0) { |
| break; |
| } |
| count += written; |
| } |
| return count; |
| } |
| |
| private AudioTrack createStreamingAudioTrack() { |
| final int channelConfig = getChannelConfig(mChannelCount); |
| |
| int minBufferSizeInBytes |
| = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat); |
| int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); |
| |
| AudioFormat audioFormat = (new AudioFormat.Builder()) |
| .setChannelMask(channelConfig) |
| .setEncoding(mAudioFormat) |
| .setSampleRate(mSampleRateInHz).build(); |
| AudioTrack audioTrack = new AudioTrack(mAudioParams.mAudioAttributes, |
| audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM, |
| mAudioParams.mSessionId); |
| |
| if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { |
| Log.w(TAG, "Unable to create audio track."); |
| audioTrack.release(); |
| return null; |
| } |
| |
| mAudioBufferSize = bufferSizeInBytes; |
| |
| setupVolume(audioTrack, mAudioParams.mVolume, mAudioParams.mPan); |
| return audioTrack; |
| } |
| |
| private void blockUntilDone(AudioTrack audioTrack) { |
| if (mBytesWritten <= 0) { |
| return; |
| } |
| |
| if (mIsShortUtterance) { |
| // In this case we would have called AudioTrack#stop() to flush |
| // buffers to the mixer. This makes the playback head position |
| // unobservable and notification markers do not work reliably. We |
| // have no option but to wait until we think the track would finish |
| // playing and release it after. |
| // |
| // This isn't as bad as it looks because (a) We won't end up waiting |
| // for much longer than we should because even at 4khz mono, a short |
| // utterance weighs in at about 2 seconds, and (b) such short utterances |
| // are expected to be relatively infrequent and in a stream of utterances |
| // this shows up as a slightly longer pause. |
| blockUntilEstimatedCompletion(); |
| } else { |
| blockUntilCompletion(audioTrack); |
| } |
| } |
| |
| private void blockUntilEstimatedCompletion() { |
| final int lengthInFrames = mBytesWritten / mBytesPerFrame; |
| final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz); |
| |
| if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); |
| |
| try { |
| Thread.sleep(estimatedTimeMs); |
| } catch (InterruptedException ie) { |
| // Do nothing. |
| } |
| } |
| |
| private void blockUntilCompletion(AudioTrack audioTrack) { |
| final int lengthInFrames = mBytesWritten / mBytesPerFrame; |
| |
| int previousPosition = -1; |
| int currentPosition = 0; |
| long blockedTimeMs = 0; |
| |
| while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && |
| audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) { |
| |
| final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / |
| audioTrack.getSampleRate(); |
| final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); |
| |
| // Check if the audio track has made progress since the last loop |
| // iteration. We should then add in the amount of time that was |
| // spent sleeping in the last iteration. |
| if (currentPosition == previousPosition) { |
| // This works only because the sleep time that would have been calculated |
| // would be the same in the previous iteration too. |
| blockedTimeMs += sleepTimeMs; |
| // If we've taken too long to make progress, bail. |
| if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { |
| Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + |
| "for AudioTrack to make progress, Aborting"); |
| break; |
| } |
| } else { |
| blockedTimeMs = 0; |
| } |
| previousPosition = currentPosition; |
| |
| if (DBG) { |
| Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + |
| " Playback position : " + currentPosition + ", Length in frames : " |
| + lengthInFrames); |
| } |
| try { |
| Thread.sleep(sleepTimeMs); |
| } catch (InterruptedException ie) { |
| break; |
| } |
| } |
| } |
| |
| private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { |
| final float vol = clip(volume, 0.0f, 1.0f); |
| final float panning = clip(pan, -1.0f, 1.0f); |
| |
| float volLeft = vol; |
| float volRight = vol; |
| if (panning > 0.0f) { |
| volLeft *= (1.0f - panning); |
| } else if (panning < 0.0f) { |
| volRight *= (1.0f + panning); |
| } |
| if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); |
| if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { |
| Log.e(TAG, "Failed to set volume"); |
| } |
| } |
| |
| private static final long clip(long value, long min, long max) { |
| return value < min ? min : (value < max ? value : max); |
| } |
| |
| private static final float clip(float value, float min, float max) { |
| return value < min ? min : (value < max ? value : max); |
| } |
| |
| } |