blob: 7efc26466c8ea44df83eb6970c84c0a4c6c7532e [file] [log] [blame]
Bjorn Bringert50e657b2011-03-08 16:00:40 +00001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.util.Log;
20
21import java.io.File;
Bjorn Bringert71e0b482011-04-15 14:37:05 +010022import java.io.FileOutputStream;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000023import java.io.IOException;
24import java.io.RandomAccessFile;
25import java.nio.ByteBuffer;
26import java.nio.ByteOrder;
27
28/**
29 * Speech synthesis request that writes the audio to a WAV file.
30 */
31class FileSynthesisRequest extends SynthesisRequest {
32
33 private static final String TAG = "FileSynthesisRequest";
34 private static final boolean DBG = false;
35
Bjorn Bringert71e0b482011-04-15 14:37:05 +010036 private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
37
Bjorn Bringert50e657b2011-03-08 16:00:40 +000038 private static final int WAV_HEADER_LENGTH = 44;
39 private static final short WAV_FORMAT_PCM = 0x0001;
40
41 private final Object mStateLock = new Object();
42 private final File mFileName;
43 private int mSampleRateInHz;
44 private int mAudioFormat;
45 private int mChannelCount;
46 private RandomAccessFile mFile;
47 private boolean mStopped = false;
Bjorn Bringert360eb162011-04-19 09:20:35 +010048 private boolean mDone = false;
Bjorn Bringert50e657b2011-03-08 16:00:40 +000049
50 FileSynthesisRequest(String text, File fileName) {
51 super(text);
52 mFileName = fileName;
53 }
54
55 @Override
56 void stop() {
57 synchronized (mStateLock) {
58 mStopped = true;
59 cleanUp();
60 }
61 }
62
63 /**
64 * Must be called while holding the monitor on {@link #mStateLock}.
65 */
66 private void cleanUp() {
67 closeFile();
68 if (mFile != null) {
69 mFileName.delete();
70 }
71 }
72
73 /**
74 * Must be called while holding the monitor on {@link #mStateLock}.
75 */
76 private void closeFile() {
77 try {
78 if (mFile != null) {
79 mFile.close();
80 mFile = null;
81 }
82 } catch (IOException ex) {
83 Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
84 }
85 }
86
87 @Override
Bjorn Bringert71e0b482011-04-15 14:37:05 +010088 public int getMaxBufferSize() {
89 return MAX_AUDIO_BUFFER_SIZE;
90 }
91
92 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +010093 boolean isDone() {
94 return mDone;
95 }
96
97 @Override
Bjorn Bringert50e657b2011-03-08 16:00:40 +000098 public int start(int sampleRateInHz, int audioFormat, int channelCount) {
99 if (DBG) {
100 Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
101 + "," + channelCount + ")");
102 }
103 synchronized (mStateLock) {
104 if (mStopped) {
105 if (DBG) Log.d(TAG, "Request has been aborted.");
106 return TextToSpeech.ERROR;
107 }
108 if (mFile != null) {
109 cleanUp();
110 throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
111 }
112 mSampleRateInHz = sampleRateInHz;
113 mAudioFormat = audioFormat;
114 mChannelCount = channelCount;
115 try {
116 mFile = new RandomAccessFile(mFileName, "rw");
117 // Reserve space for WAV header
118 mFile.write(new byte[WAV_HEADER_LENGTH]);
119 return TextToSpeech.SUCCESS;
120 } catch (IOException ex) {
121 Log.e(TAG, "Failed to open " + mFileName + ": " + ex);
122 cleanUp();
123 return TextToSpeech.ERROR;
124 }
125 }
126 }
127
128 @Override
129 public int audioAvailable(byte[] buffer, int offset, int length) {
130 if (DBG) {
131 Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
132 + "," + length + ")");
133 }
134 synchronized (mStateLock) {
135 if (mStopped) {
136 if (DBG) Log.d(TAG, "Request has been aborted.");
137 return TextToSpeech.ERROR;
138 }
139 if (mFile == null) {
140 Log.e(TAG, "File not open");
141 return TextToSpeech.ERROR;
142 }
143 try {
144 mFile.write(buffer, offset, length);
145 return TextToSpeech.SUCCESS;
146 } catch (IOException ex) {
147 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
148 cleanUp();
149 return TextToSpeech.ERROR;
150 }
151 }
152 }
153
154 @Override
155 public int done() {
156 if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
157 synchronized (mStateLock) {
158 if (mStopped) {
159 if (DBG) Log.d(TAG, "Request has been aborted.");
160 return TextToSpeech.ERROR;
161 }
162 if (mFile == null) {
163 Log.e(TAG, "File not open");
164 return TextToSpeech.ERROR;
165 }
166 try {
167 // Write WAV header at start of file
168 mFile.seek(0);
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100169 int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
170 mFile.write(
171 makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000172 closeFile();
Bjorn Bringert360eb162011-04-19 09:20:35 +0100173 mDone = true;
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000174 return TextToSpeech.SUCCESS;
175 } catch (IOException ex) {
176 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
177 cleanUp();
178 return TextToSpeech.ERROR;
179 }
180 }
181 }
182
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100183 @Override
Bjorn Bringert360eb162011-04-19 09:20:35 +0100184 public void error() {
185 if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
186 synchronized (mStateLock) {
187 cleanUp();
188 }
189 }
190
191 @Override
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100192 public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
193 byte[] buffer, int offset, int length) {
194 synchronized (mStateLock) {
195 if (mStopped) {
196 if (DBG) Log.d(TAG, "Request has been aborted.");
197 return TextToSpeech.ERROR;
198 }
199 }
200 FileOutputStream out = null;
201 try {
202 out = new FileOutputStream(mFileName);
203 out.write(makeWavHeader(sampleRateInHz, audioFormat, channelCount, length));
204 out.write(buffer, offset, length);
Bjorn Bringert360eb162011-04-19 09:20:35 +0100205 mDone = true;
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100206 return TextToSpeech.SUCCESS;
207 } catch (IOException ex) {
208 Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
Bjorn Bringert360eb162011-04-19 09:20:35 +0100209 mFileName.delete();
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100210 return TextToSpeech.ERROR;
211 } finally {
212 try {
213 if (out != null) {
214 out.close();
215 }
216 } catch (IOException ex) {
217 Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
218 }
219 }
220 }
221
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000222 private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100223 int dataLength) {
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000224 // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
225 int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
226 int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
227 short blockAlign = (short) (sampleSizeInBytes * channelCount);
228 short bitsPerSample = (short) (sampleSizeInBytes * 8);
229
230 byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
231 ByteBuffer header = ByteBuffer.wrap(headerBuf);
232 header.order(ByteOrder.LITTLE_ENDIAN);
233
234 header.put(new byte[]{ 'R', 'I', 'F', 'F' });
Bjorn Bringert71e0b482011-04-15 14:37:05 +0100235 header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000236 header.put(new byte[]{ 'W', 'A', 'V', 'E' });
237 header.put(new byte[]{ 'f', 'm', 't', ' ' });
238 header.putInt(16); // size of fmt chunk
239 header.putShort(WAV_FORMAT_PCM);
240 header.putShort((short) channelCount);
241 header.putInt(sampleRateInHz);
242 header.putInt(byteRate);
243 header.putShort(blockAlign);
244 header.putShort(bitsPerSample);
245 header.put(new byte[]{ 'd', 'a', 't', 'a' });
Bjorn Bringert50e657b2011-03-08 16:00:40 +0000246 header.putInt(dataLength);
247
248 return headerBuf;
249 }
250
251}