blob: e0f985ef8173b4245fb9cbd67e0008c5c4d631dd [file] [log] [blame]
* Copyright (C) 2019 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import android.content.Context;
import android.os.Handler;
import android.speech.tts.TextToSpeech;
import android.speech.tts.UtteranceProgressListener;
import android.util.Log;
import android.util.Pair;
import androidx.annotation.VisibleForTesting;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.BiConsumer;
* Component that wraps platform TTS engine and supports play-out of batches of text.
* <p>
* It takes care of setting up TTS Engine when text is played out and shutting it down after an idle
* period with no play-out. This is desirable since the owning app is long-lived and the TTS Engine
* brings up another service-process.
* <p>
* As batches of text are played-out, they issue callbacks on the {@link Listener} provided with the
* batch.
public class TextToSpeechHelper {
* Listener interface used by clients to be notified as batch of text is played out.
public interface Listener {
* Called when play-out starts for batch. May never get called if batch has errors or
* interruptions.
void onTextToSpeechStarted(long requestId);
* Called when play-out ends for batch.
* @param error Whether play-out ended due to an error or not. Note: if it was aborted, it's
* not considered an error.
void onTextToSpeechStopped(long requestId, boolean error);
private static final String TAG = "CM#TextToSpeechHelper";
private static final String UTTERANCE_ID_SEPARATOR = ";";
private static final long DEFAULT_SHUTDOWN_DELAY_MILLIS = TimeUnit.MINUTES.toMillis(1);
private final Map<String, BatchListener> mListeners = new HashMap<>();
private final Handler mHandler = new Handler();
private final Context mContext;
private final TextToSpeechHelper.Listener mListener;
private final AudioManager.OnAudioFocusChangeListener mNoOpListener = (f) -> { /* NO-OP */ };
private final AudioManager mAudioManager;
private final AudioAttributes mAudioAttributes;
private final AudioFocusRequest mAudioFocusRequest;
private final long mShutdownDelayMillis;
private TextToSpeechEngine mTextToSpeechEngine;
private int mInitStatus;
private SpeechRequest mPendingRequest;
private String mCurrentBatchId;
private final Runnable mMaybeShutdownRunnable = new Runnable() {
public void run() {
if (mListeners.isEmpty() || mPendingRequest == null) {
} else {
mHandler.postDelayed(this, mShutdownDelayMillis);
public TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener) {
this(context, new AndroidTextToSpeechEngine(), DEFAULT_SHUTDOWN_DELAY_MILLIS, listener);
TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis,
TextToSpeechHelper.Listener listener) {
mContext = context;
mAudioManager = (AudioManager) mContext.getSystemService(Context.AUDIO_SERVICE);
mTextToSpeechEngine = ttsEngine;
mShutdownDelayMillis = shutdownDelayMillis;
// OnInitListener will only set to SUCCESS/ERROR. So we initialize to STOPPED.
mInitStatus = TextToSpeech.STOPPED;
mListener = listener;
mAudioAttributes = new AudioAttributes.Builder()
mAudioFocusRequest = new AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT)
private void maybeInitAndKeepAlive() {
if (!mTextToSpeechEngine.isInitialized()) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "Initializing TTS Engine");
mTextToSpeechEngine.initialize(mContext, this::handleInitCompleted);
// Since we're handling a request, delay engine shutdown.
mHandler.postDelayed(mMaybeShutdownRunnable, mShutdownDelayMillis);
private void handleInitCompleted(int initStatus) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, String.format("Init completed. Status: %d", initStatus));
mInitStatus = initStatus;
if (mPendingRequest != null) {
playInternal(mPendingRequest.mTextToSpeak, mPendingRequest.mRequestId);
mPendingRequest = null;
* Plays out given batch of text. If engine is not active, it is setup and the request is stored
* until then. Only one batch is supported at a time; If a previous batch is waiting engine
* setup, that batch is dropped. If a previous batch is playing, the play-out is stopped and
* next one is passed to the TTS Engine. Callbacks are issued on the provided {@code listener}.
* Will request audio focus first, failure will trigger onAudioFocusFailed in listener.
* <p/>
* NOTE: Underlying engine may have limit on length of text in each element of the batch; it
* will reject anything longer. See {@link TextToSpeech#getMaxSpeechInputLength()}.
* @param textToSpeak Batch of text to play-out.
* @param requestId The tracking request id
* @return true if the request to play was successful
public boolean requestPlay(List<CharSequence> textToSpeak, long requestId) {
if (textToSpeak.isEmpty()) {
/* no-op */
return true;
int result = mAudioManager.requestAudioFocus(mAudioFocusRequest);
if (result != AudioManager.AUDIOFOCUS_REQUEST_GRANTED) {
return false;
// Check if its still initializing.
if (mInitStatus == TextToSpeech.STOPPED) {
// Squash any already queued request.
if (mPendingRequest != null) {
onTtsStopped(requestId, /* error= */ false);
mPendingRequest = new SpeechRequest(textToSpeak, requestId);
} else {
playInternal(textToSpeak, requestId);
return true;
/** Requests that all play-out be stopped. */
public void requestStop() {
mCurrentBatchId = null;
public boolean isSpeaking() {
return mTextToSpeechEngine.isSpeaking();
// wrap call back to listener.onTextToSpeechStopped with adandonAudioFocus.
private void onTtsStopped(long requestId, boolean error) {
mAudioManager.abandonAudioFocusRequest(mAudioFocusRequest); -> mListener.onTextToSpeechStopped(requestId, error));
private void playInternal(List<CharSequence> textToSpeak, long requestId) {
if (mInitStatus == TextToSpeech.ERROR) {
Log.e(TAG, "TTS setup failed!");
onTtsStopped(requestId, /* error= */ true);
// Abort anything currently playing and flushes queue.
// Queue up new batch. We assign id's = "batchId;index" where index increments from 0
// to batchSize - 1. If queueing fails, we abort the whole batch.
mCurrentBatchId = Long.toString(requestId);
for (int i = 0; i < textToSpeak.size(); i++) {
CharSequence text = textToSpeak.get(i);
String utteranceId =
String.format("%s%s%d", mCurrentBatchId, UTTERANCE_ID_SEPARATOR, i);
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, String.format("Queueing tts: '%s' [%s]", text, utteranceId));
if (mTextToSpeechEngine.speak(text, TextToSpeech.QUEUE_ADD, /* params= */ null,
utteranceId) != TextToSpeech.SUCCESS) {
mCurrentBatchId = null;
Log.e(TAG, "Queuing text failed!");
onTtsStopped(requestId, /* error= */ true);
// Register BatchListener for entire batch. Will invoke callbacks on Listener as batch
// progresses.
mListeners.put(mCurrentBatchId, new BatchListener(requestId, textToSpeak.size()));
* Releases resources and shuts down TTS Engine.
public void cleanup() {
mHandler.removeCallbacksAndMessages(/* token= */ null);
/** Returns the stream used by the TTS engine. */
public int getStream() {
return mTextToSpeechEngine.getStream();
private void shutdownEngine() {
if (mTextToSpeechEngine.isInitialized()) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "Shutting down TTS Engine");
mInitStatus = TextToSpeech.STOPPED;
private static Pair<String, Integer> parse(String utteranceId) {
try {
String[] pair = utteranceId.split(UTTERANCE_ID_SEPARATOR);
String batchId = pair[0];
int index = Integer.valueOf(pair[1]);
return Pair.create(batchId, index);
} catch (IndexOutOfBoundsException | NumberFormatException e) {
throw new IllegalArgumentException(
String.format("Utterance ID is invalid: %s.", utteranceId)
// Handles all callbacks from TextToSpeechEngine. Possible order of callbacks:
// - onStart, onDone: successful play-out.
// - onStart, onStop: play-out starts, but interrupted.
// - onStart, onError: play-out starts and fails.
// - onStop: play-out never starts, but aborted.
// - onError: play-out never starts, but fails.
// Since the callbacks arrive on other threads, they are dispatched onto mHandler where the
// appropriate BatchListener is invoked.
private final UtteranceProgressListener mProgressListener = new UtteranceProgressListener() {
private void safeInvokeAsync(String utteranceId,
BiConsumer<BatchListener, Pair<String, Integer>> callback) { -> {
Pair<String, Integer> parsedId = parse(utteranceId);
BatchListener listener = mListeners.get(parsedId.first);
if (listener != null) {
callback.accept(listener, parsedId);
} else {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "Missing batch listener: " + utteranceId);
public void onStart(String utteranceId) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "TTS onStart: " + utteranceId);
} -> {
Pair<String, Integer> parsedId = parse(utteranceId);
BatchListener listener = mListeners.get(parsedId.first);
if (listener != null) {
} else {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "Missing batch listener: " + utteranceId);
public void onDone(String utteranceId) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "TTS onDone: " + utteranceId);
safeInvokeAsync(utteranceId, BatchListener::onDone);
public void onStop(String utteranceId, boolean interrupted) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "TTS onStop: " + utteranceId);
safeInvokeAsync(utteranceId, BatchListener::onStop);
public void onError(String utteranceId) {
if (Log.isLoggable(TAG, Log.DEBUG)) {
Log.d(TAG, "TTS onError: " + utteranceId);
safeInvokeAsync(utteranceId, BatchListener::onError);
* Handles callbacks for a single batch of TTS text and issues callbacks on wrapped
* {@link Listener} that client is listening on.
private class BatchListener {
private boolean mBatchStarted;
private final long mRequestId;
private final int mUtteranceCount;
BatchListener(long requestId, int utteranceCount) {
mRequestId = requestId;
mUtteranceCount = utteranceCount;
// Issues Listener.onTextToSpeechStarted when first item of batch starts.
void onStart() {
if (!mBatchStarted) {
mBatchStarted = true;
// Issues Listener.onTextToSpeechStopped when last item of batch finishes.
void onDone(Pair<String, Integer> parsedId) {
// parseId is zero-indexed, mUtteranceCount is not.
if (parsedId.second == (mUtteranceCount - 1)) {
handleBatchFinished(parsedId, /* error= */ false);
// If any item of batch fails, abort the batch and issue Listener.onTextToSpeechStopped.
void onError(Pair<String, Integer> parsedId) {
if (parsedId.first.equals(mCurrentBatchId)) {
handleBatchFinished(parsedId, /* error= */ true);
// If any item of batch is preempted (rest should also be),
// issue Listener.onTextToSpeechStopped.
void onStop(Pair<String, Integer> parsedId) {
handleBatchFinished(parsedId, /* error= */ false);
// Handles terminal callbacks for the batch. We invoke stopped and remove ourselves.
// No further callbacks will be handled for the batch.
private void handleBatchFinished(Pair<String, Integer> parsedId, boolean error) {
onTtsStopped(mRequestId, error);
private static class SpeechRequest {
final List<CharSequence> mTextToSpeak;
final long mRequestId;
SpeechRequest(List<CharSequence> textToSpeak, long requestId) {
mTextToSpeak = textToSpeak;
mRequestId = requestId;