| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| * use this file except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| * License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| package android.speech.tts; |
| |
| import android.media.AudioFormat; |
| import android.media.AudioTrack; |
| import android.util.Log; |
| |
| /** |
| * Speech synthesis request that plays the audio as it is received. |
| */ |
| class PlaybackSynthesisRequest extends SynthesisRequest { |
| |
| private static final String TAG = "PlaybackSynthesisRequest"; |
| private static final boolean DBG = false; |
| |
| private static final int MIN_AUDIO_BUFFER_SIZE = 8192; |
| |
| /** |
| * Audio stream type. Must be one of the STREAM_ contants defined in |
| * {@link android.media.AudioManager}. |
| */ |
| private final int mStreamType; |
| |
| /** |
| * Volume, in the range [0.0f, 1.0f]. The default value is |
| * {@link TextToSpeech.Engine#DEFAULT_VOLUME} (1.0f). |
| */ |
| private final float mVolume; |
| |
| /** |
| * Left/right position of the audio, in the range [-1.0f, 1.0f]. |
| * The default value is {@link TextToSpeech.Engine#DEFAULT_PAN} (0.0f). |
| */ |
| private final float mPan; |
| |
| private final Object mStateLock = new Object(); |
| private AudioTrack mAudioTrack = null; |
| private boolean mStopped = false; |
| private boolean mDone = false; |
| |
| PlaybackSynthesisRequest(String text, int streamType, float volume, float pan) { |
| super(text); |
| mStreamType = streamType; |
| mVolume = volume; |
| mPan = pan; |
| } |
| |
| @Override |
| void stop() { |
| if (DBG) Log.d(TAG, "stop()"); |
| synchronized (mStateLock) { |
| mStopped = true; |
| cleanUp(); |
| } |
| } |
| |
| private void cleanUp() { |
| if (DBG) Log.d(TAG, "cleanUp()"); |
| if (mAudioTrack != null) { |
| mAudioTrack.flush(); |
| mAudioTrack.stop(); |
| mAudioTrack.release(); |
| mAudioTrack = null; |
| } |
| } |
| |
| @Override |
| public int getMaxBufferSize() { |
| // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be |
| // a safe buffer size to pass in. |
| return MIN_AUDIO_BUFFER_SIZE; |
| } |
| |
| @Override |
| boolean isDone() { |
| return mDone; |
| } |
| |
| // TODO: add a thread that writes to the AudioTrack? |
| @Override |
| public int start(int sampleRateInHz, int audioFormat, int channelCount) { |
| if (DBG) { |
| Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat |
| + "," + channelCount + ")"); |
| } |
| |
| synchronized (mStateLock) { |
| if (mStopped) { |
| if (DBG) Log.d(TAG, "Request has been aborted."); |
| return TextToSpeech.ERROR; |
| } |
| if (mAudioTrack != null) { |
| Log.e(TAG, "start() called twice"); |
| cleanUp(); |
| return TextToSpeech.ERROR; |
| } |
| |
| mAudioTrack = createStreamingAudioTrack(sampleRateInHz, audioFormat, channelCount); |
| if (mAudioTrack == null) { |
| return TextToSpeech.ERROR; |
| } |
| } |
| |
| return TextToSpeech.SUCCESS; |
| } |
| |
| private void setupVolume(AudioTrack audioTrack, float volume, float pan) { |
| float vol = clip(volume, 0.0f, 1.0f); |
| float panning = clip(pan, -1.0f, 1.0f); |
| float volLeft = vol; |
| float volRight = vol; |
| if (panning > 0.0f) { |
| volLeft *= (1.0f - panning); |
| } else if (panning < 0.0f) { |
| volRight *= (1.0f + panning); |
| } |
| if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); |
| if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { |
| Log.e(TAG, "Failed to set volume"); |
| } |
| } |
| |
| private float clip(float value, float min, float max) { |
| return value > max ? max : (value < min ? min : value); |
| } |
| |
| @Override |
| public int audioAvailable(byte[] buffer, int offset, int length) { |
| if (DBG) { |
| Log.d(TAG, "audioAvailable(byte[" + buffer.length + "]," |
| + offset + "," + length + ")"); |
| } |
| if (length > getMaxBufferSize()) { |
| throw new IllegalArgumentException("buffer is too large (" + length + " bytes)"); |
| } |
| synchronized (mStateLock) { |
| if (mStopped) { |
| if (DBG) Log.d(TAG, "Request has been aborted."); |
| return TextToSpeech.ERROR; |
| } |
| if (mAudioTrack == null) { |
| Log.e(TAG, "audioAvailable(): Not started"); |
| return TextToSpeech.ERROR; |
| } |
| int playState = mAudioTrack.getPlayState(); |
| if (playState == AudioTrack.PLAYSTATE_STOPPED) { |
| if (DBG) Log.d(TAG, "AudioTrack stopped, restarting"); |
| mAudioTrack.play(); |
| } |
| // TODO: loop until all data is written? |
| if (DBG) Log.d(TAG, "AudioTrack.write()"); |
| int count = mAudioTrack.write(buffer, offset, length); |
| if (DBG) Log.d(TAG, "AudioTrack.write() returned " + count); |
| if (count < 0) { |
| Log.e(TAG, "Writing to AudioTrack failed: " + count); |
| cleanUp(); |
| return TextToSpeech.ERROR; |
| } else { |
| return TextToSpeech.SUCCESS; |
| } |
| } |
| } |
| |
| @Override |
| public int done() { |
| if (DBG) Log.d(TAG, "done()"); |
| synchronized (mStateLock) { |
| if (mStopped) { |
| if (DBG) Log.d(TAG, "Request has been aborted."); |
| return TextToSpeech.ERROR; |
| } |
| if (mAudioTrack == null) { |
| Log.e(TAG, "done(): Not started"); |
| return TextToSpeech.ERROR; |
| } |
| mDone = true; |
| cleanUp(); |
| } |
| return TextToSpeech.SUCCESS; |
| } |
| |
| @Override |
| public void error() { |
| if (DBG) Log.d(TAG, "error()"); |
| synchronized (mStateLock) { |
| cleanUp(); |
| } |
| } |
| |
| @Override |
| public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount, |
| byte[] buffer, int offset, int length) { |
| if (DBG) { |
| Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat |
| + "," + channelCount + "byte[" + buffer.length + "]," |
| + offset + "," + length + ")"); |
| } |
| |
| synchronized (mStateLock) { |
| if (mStopped) { |
| if (DBG) Log.d(TAG, "Request has been aborted."); |
| return TextToSpeech.ERROR; |
| } |
| if (mAudioTrack != null) { |
| Log.e(TAG, "start() called before completeAudioAvailable()"); |
| cleanUp(); |
| return TextToSpeech.ERROR; |
| } |
| |
| int channelConfig = getChannelConfig(channelCount); |
| if (channelConfig < 0) { |
| Log.e(TAG, "Unsupported number of channels :" + channelCount); |
| cleanUp(); |
| return TextToSpeech.ERROR; |
| } |
| int bytesPerFrame = getBytesPerFrame(audioFormat); |
| if (bytesPerFrame < 0) { |
| Log.e(TAG, "Unsupported audio format :" + audioFormat); |
| cleanUp(); |
| return TextToSpeech.ERROR; |
| } |
| |
| mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, |
| audioFormat, buffer.length, AudioTrack.MODE_STATIC); |
| if (mAudioTrack == null) { |
| return TextToSpeech.ERROR; |
| } |
| |
| try { |
| mAudioTrack.write(buffer, offset, length); |
| setupVolume(mAudioTrack, mVolume, mPan); |
| mAudioTrack.play(); |
| blockUntilDone(mAudioTrack, bytesPerFrame, length); |
| mDone = true; |
| if (DBG) Log.d(TAG, "Wrote data to audio track succesfully : " + length); |
| } catch (IllegalStateException ex) { |
| Log.e(TAG, "Playback error", ex); |
| return TextToSpeech.ERROR; |
| } finally { |
| cleanUp(); |
| } |
| } |
| |
| return TextToSpeech.SUCCESS; |
| } |
| |
| private void blockUntilDone(AudioTrack audioTrack, int bytesPerFrame, int length) { |
| int lengthInFrames = length / bytesPerFrame; |
| int currentPosition = 0; |
| while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames) { |
| long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / |
| audioTrack.getSampleRate(); |
| if (DBG) Log.d(TAG, "About to sleep for : " + estimatedTimeMs + " ms," + |
| " Playback position : " + currentPosition); |
| try { |
| Thread.sleep(estimatedTimeMs); |
| } catch (InterruptedException ie) { |
| break; |
| } |
| } |
| } |
| |
| private int getBytesPerFrame(int audioFormat) { |
| if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) { |
| return 1; |
| } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) { |
| return 2; |
| } |
| |
| return -1; |
| } |
| |
| private int getChannelConfig(int channelCount) { |
| if (channelCount == 1) { |
| return AudioFormat.CHANNEL_OUT_MONO; |
| } else if (channelCount == 2){ |
| return AudioFormat.CHANNEL_OUT_STEREO; |
| } |
| |
| return -1; |
| } |
| |
| private AudioTrack createStreamingAudioTrack(int sampleRateInHz, int audioFormat, |
| int channelCount) { |
| int channelConfig = getChannelConfig(channelCount); |
| |
| if (channelConfig < 0) { |
| Log.e(TAG, "Unsupported number of channels : " + channelCount); |
| return null; |
| } |
| |
| int minBufferSizeInBytes |
| = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); |
| int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); |
| AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, |
| audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); |
| if (audioTrack == null) { |
| return null; |
| } |
| |
| if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { |
| audioTrack.release(); |
| return null; |
| } |
| setupVolume(audioTrack, mVolume, mPan); |
| return audioTrack; |
| } |
| } |