blob: ab8f82f4567d37be66292db1e018ebeb42adf033 [file] [log] [blame]
Bjorn Bringert50e657b2011-03-08 16:00:40 +00001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
Narayan Kamath563fd3a2011-10-27 13:41:54 +010019import android.os.FileUtils;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000020import android.util.Log;
21
22import java.io.File;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000023import java.io.FileOutputStream;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000024import java.io.IOException;
25import java.io.RandomAccessFile;
26import java.nio.ByteBuffer;
27import java.nio.ByteOrder;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000028import java.nio.channels.FileChannel;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000029
30/**
31 * Speech synthesis request that writes the audio to a WAV file.
32 */
Narayan Kamathe22b69a2011-06-08 11:41:47 +010033class FileSynthesisCallback extends AbstractSynthesisCallback {
Bjorn Bringert50e657b2011-03-08 16:00:40 +000034
35 private static final String TAG = "FileSynthesisRequest";
36 private static final boolean DBG = false;
37
Bjorn Bringert71e0b482011-04-15 14:37:05 +010038 private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
39
Bjorn Bringert50e657b2011-03-08 16:00:40 +000040 private static final int WAV_HEADER_LENGTH = 44;
41 private static final short WAV_FORMAT_PCM = 0x0001;
42
43 private final Object mStateLock = new Object();
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000044
Bjorn Bringert50e657b2011-03-08 16:00:40 +000045 private int mSampleRateInHz;
46 private int mAudioFormat;
47 private int mChannelCount;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000048
49 private FileChannel mFileChannel;
50
51 private boolean mStarted = false;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000052 private boolean mStopped = false;
Bjorn Bringert360eb162011-04-19 09:20:35 +010053 private boolean mDone = false;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000054
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000055 FileSynthesisCallback(FileChannel fileChannel) {
56 mFileChannel = fileChannel;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000057 }
58
59 @Override
60 void stop() {
61 synchronized (mStateLock) {
62 mStopped = true;
63 cleanUp();
64 }
65 }
66
67 /**
68 * Must be called while holding the monitor on {@link #mStateLock}.
69 */
70 private void cleanUp() {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000071 closeFile();
Bjorn Bringert50e657b2011-03-08 16:00:40 +000072 }
73
74 /**
75 * Must be called while holding the monitor on {@link #mStateLock}.
76 */
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000077 private void closeFile() {
Bjorn Bringert50e657b2011-03-08 16:00:40 +000078 try {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000079 if (mFileChannel != null) {
80 mFileChannel.close();
81 mFileChannel = null;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000082 }
83 } catch (IOException ex) {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +000084 Log.e(TAG, "Failed to close output file descriptor", ex);
Narayan Kamath563fd3a2011-10-27 13:41:54 +010085 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +000086 }
87
88 @Override
Bjorn Bringert71e0b482011-04-15 14:37:05 +010089 public int getMaxBufferSize() {
90 return MAX_AUDIO_BUFFER_SIZE;
91 }
92
93 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +010094 boolean isDone() {
95 return mDone;
96 }
97
98 @Override
Bjorn Bringert50e657b2011-03-08 16:00:40 +000099 public int start(int sampleRateInHz, int audioFormat, int channelCount) {
100 if (DBG) {
101 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
102 + "," + channelCount + ")");
103 }
104 synchronized (mStateLock) {
105 if (mStopped) {
106 if (DBG) Log.d(TAG, "Request has been aborted.");
107 return TextToSpeech.ERROR;
108 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000109 if (mStarted) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000110 cleanUp();
111 throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
112 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000113 mStarted = true;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000114 mSampleRateInHz = sampleRateInHz;
115 mAudioFormat = audioFormat;
116 mChannelCount = channelCount;
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000117
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000118 try {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000119 mFileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000120 return TextToSpeech.SUCCESS;
121 } catch (IOException ex) {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000122 Log.e(TAG, "Failed to write wav header to output file descriptor" + ex);
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000123 cleanUp();
124 return TextToSpeech.ERROR;
125 }
126 }
127 }
128
129 @Override
130 public int audioAvailable(byte[] buffer, int offset, int length) {
131 if (DBG) {
132 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
133 + "," + length + ")");
134 }
135 synchronized (mStateLock) {
136 if (mStopped) {
137 if (DBG) Log.d(TAG, "Request has been aborted.");
138 return TextToSpeech.ERROR;
139 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000140 if (mFileChannel == null) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000141 Log.e(TAG, "File not open");
142 return TextToSpeech.ERROR;
143 }
144 try {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000145 mFileChannel.write(ByteBuffer.wrap(buffer, offset, length));
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000146 return TextToSpeech.SUCCESS;
147 } catch (IOException ex) {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000148 Log.e(TAG, "Failed to write to output file descriptor", ex);
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000149 cleanUp();
150 return TextToSpeech.ERROR;
151 }
152 }
153 }
154
155 @Override
156 public int done() {
157 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
158 synchronized (mStateLock) {
Narayan Kamath9c3d7a82012-07-20 18:01:43 +0100159 if (mDone) {
160 if (DBG) Log.d(TAG, "Duplicate call to done()");
161 // This preserves existing behaviour. Earlier, if done was called twice
162 // we'd return ERROR because mFile == null and we'd add to logspam.
163 return TextToSpeech.ERROR;
164 }
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000165 if (mStopped) {
166 if (DBG) Log.d(TAG, "Request has been aborted.");
167 return TextToSpeech.ERROR;
168 }
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000169 if (mFileChannel == null) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000170 Log.e(TAG, "File not open");
171 return TextToSpeech.ERROR;
172 }
173 try {
174 // Write WAV header at start of file
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000175 mFileChannel.position(0);
176 int dataLength = (int) (mFileChannel.size() - WAV_HEADER_LENGTH);
177 mFileChannel.write(
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100178 makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000179 closeFile();
Bjorn Bringert360eb162011-04-19 09:20:35 +0100180 mDone = true;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000181 return TextToSpeech.SUCCESS;
182 } catch (IOException ex) {
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000183 Log.e(TAG, "Failed to write to output file descriptor", ex);
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000184 cleanUp();
185 return TextToSpeech.ERROR;
186 }
187 }
188 }
189
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100190 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +0100191 public void error() {
192 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
193 synchronized (mStateLock) {
194 cleanUp();
195 }
196 }
197
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000198 private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100199 int dataLength) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000200 // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
201 int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
202 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
203 short blockAlign = (short) (sampleSizeInBytes * channelCount);
204 short bitsPerSample = (short) (sampleSizeInBytes * 8);
205
206 byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
207 ByteBuffer header = ByteBuffer.wrap(headerBuf);
208 header.order(ByteOrder.LITTLE_ENDIAN);
209
210 header.put(new byte[]{ 'R', 'I', 'F', 'F' });
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100211 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000212 header.put(new byte[]{ 'W', 'A', 'V', 'E' });
213 header.put(new byte[]{ 'f', 'm', 't', ' ' });
214 header.putInt(16); // size of fmt chunk
215 header.putShort(WAV_FORMAT_PCM);
216 header.putShort((short) channelCount);
217 header.putInt(sampleRateInHz);
218 header.putInt(byteRate);
219 header.putShort(blockAlign);
220 header.putShort(bitsPerSample);
221 header.put(new byte[]{ 'd', 'a', 't', 'a' });
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000222 header.putInt(dataLength);
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000223 header.flip();
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000224
Przemyslaw Szczepaniak5acb33a2013-02-08 16:36:25 +0000225 return header;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000226 }
227
228}