Merge "Implement time markers for TTS."
diff --git a/api/current.txt b/api/current.txt
index 243c78f..2fe3f44 100644
--- a/api/current.txt
+++ b/api/current.txt
@@ -36245,6 +36245,7 @@
method public abstract int getMaxBufferSize();
method public abstract boolean hasFinished();
method public abstract boolean hasStarted();
+ method public default void rangeStart(int, int, int);
method public abstract int start(int, int, int);
}
@@ -36397,6 +36398,7 @@
method public void onError(java.lang.String, int);
method public abstract void onStart(java.lang.String);
method public void onStop(java.lang.String, boolean);
+ method public void onUtteranceRangeStart(java.lang.String, int, int);
}
public class Voice implements android.os.Parcelable {
diff --git a/api/system-current.txt b/api/system-current.txt
index 063c3c3..7a6d87a 100644
--- a/api/system-current.txt
+++ b/api/system-current.txt
@@ -39235,6 +39235,7 @@
method public abstract int getMaxBufferSize();
method public abstract boolean hasFinished();
method public abstract boolean hasStarted();
+ method public default void rangeStart(int, int, int);
method public abstract int start(int, int, int);
}
@@ -39387,6 +39388,7 @@
method public void onError(java.lang.String, int);
method public abstract void onStart(java.lang.String);
method public void onStop(java.lang.String, boolean);
+ method public void onUtteranceRangeStart(java.lang.String, int, int);
}
public class Voice implements android.os.Parcelable {
diff --git a/api/test-current.txt b/api/test-current.txt
index 4fcaf69..cd71166 100644
--- a/api/test-current.txt
+++ b/api/test-current.txt
@@ -36366,6 +36366,7 @@
method public abstract int getMaxBufferSize();
method public abstract boolean hasFinished();
method public abstract boolean hasStarted();
+ method public default void rangeStart(int, int, int);
method public abstract int start(int, int, int);
}
@@ -36518,6 +36519,7 @@
method public void onError(java.lang.String, int);
method public abstract void onStart(java.lang.String);
method public void onStop(java.lang.String, boolean);
+ method public void onUtteranceRangeStart(java.lang.String, int, int);
}
public class Voice implements android.os.Parcelable {
diff --git a/core/java/android/speech/tts/BlockingAudioTrack.java b/core/java/android/speech/tts/BlockingAudioTrack.java
index 9920ea1..be5851c 100644
--- a/core/java/android/speech/tts/BlockingAudioTrack.java
+++ b/core/java/android/speech/tts/BlockingAudioTrack.java
@@ -164,7 +164,7 @@
// all data from the audioTrack has been sent to the mixer, so
// it's safe to release at this point.
if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
- synchronized(mAudioTrackLock) {
+ synchronized (mAudioTrackLock) {
mAudioTrack = null;
}
track.release();
@@ -340,4 +340,25 @@
return value < min ? min : (value < max ? value : max);
}
+ /**
+ * @see
+ * AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener).
+ */
+ public void setPlaybackPositionUpdateListener(
+ AudioTrack.OnPlaybackPositionUpdateListener listener) {
+ synchronized (mAudioTrackLock) {
+ if (mAudioTrack != null) {
+ mAudioTrack.setPlaybackPositionUpdateListener(listener);
+ }
+ }
+ }
+
+ /** @see AudioTrack#setNotificationMarkerPosition(int). */
+ public void setNotificationMarkerPosition(int frames) {
+ synchronized (mAudioTrackLock) {
+ if (mAudioTrack != null) {
+ mAudioTrack.setNotificationMarkerPosition(frames);
+ }
+ }
+ }
}
diff --git a/core/java/android/speech/tts/ITextToSpeechCallback.aidl b/core/java/android/speech/tts/ITextToSpeechCallback.aidl
index 4e3acf6..edb6e48 100644
--- a/core/java/android/speech/tts/ITextToSpeechCallback.aidl
+++ b/core/java/android/speech/tts/ITextToSpeechCallback.aidl
@@ -83,4 +83,19 @@
* callback.
*/
void onAudioAvailable(String utteranceId, in byte[] audio);
+
+ /**
+ * Tells the client that the engine is about to speak the specified range of the utterance.
+ *
+ * <p>
+ * Only called if the engine supplies timing information by calling
+ * {@link SynthesisCallback#rangeStart(int, int, int)} and only when the request is played back
+ * by the service, not when using {@link android.speech.tts.TextToSpeech#synthesizeToFile}.
+ * </p>
+ *
+ * @param utteranceId Unique id identifying the synthesis request.
+ * @param start The start character index of the range in the utterance text.
+ * @param end The end character index of the range (exclusive) in the utterance text.
+ */
+ void onUtteranceRangeStart(String utteranceId, int start, int end);
}
diff --git a/core/java/android/speech/tts/PlaybackSynthesisCallback.java b/core/java/android/speech/tts/PlaybackSynthesisCallback.java
index 778aa86..9e24b09 100644
--- a/core/java/android/speech/tts/PlaybackSynthesisCallback.java
+++ b/core/java/android/speech/tts/PlaybackSynthesisCallback.java
@@ -271,4 +271,12 @@
mStatusCode = errorCode;
}
}
+
+ public void rangeStart(int markerInFrames, int start, int end) {
+ if (mItem == null) {
+ Log.e(TAG, "mItem is null");
+ return;
+ }
+ mItem.rangeStart(markerInFrames, start, end);
+ }
}
diff --git a/core/java/android/speech/tts/SynthesisCallback.java b/core/java/android/speech/tts/SynthesisCallback.java
index 2fd8499..8b74ed7 100644
--- a/core/java/android/speech/tts/SynthesisCallback.java
+++ b/core/java/android/speech/tts/SynthesisCallback.java
@@ -142,4 +142,26 @@
* <p>Useful for checking if a fallback from network request is possible.
*/
boolean hasFinished();
+
+ /**
+ * The service may call this method to provide timing information about the spoken text.
+ *
+ * <p>Calling this method means that at the given audio frame, the given range of the input is
+ * about to be spoken. If this method is called the client will receive a callback on the
+ * listener ({@link UtteranceProgressListener#onUtteranceRangeStart}) at the moment that frame
+ * has been reached by the playback head.
+ *
+ * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
+ * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
+ * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
+ * for more information.
+ *
+ * <p>This method should only be called on the synthesis thread, while in {@link
+ * TextToSpeechService#onSynthesizeText}.
+ *
+ * @param markerInFrames The position in frames in the audio where this range is spoken.
+ * @param start The start index of the range in the input text.
+ * @param end The end index (exclusive) of the range in the input text.
+ */
+ default void rangeStart(int markerInFrames, int start, int end) {}
}
diff --git a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java
index 7423933..cb5f220 100644
--- a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java
+++ b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java
@@ -17,18 +17,21 @@
import android.speech.tts.TextToSpeechService.AudioOutputParams;
import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
+import android.media.AudioTrack;
import android.util.Log;
import java.util.LinkedList;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.ConcurrentLinkedQueue;
/**
- * Manages the playback of a list of byte arrays representing audio data
- * that are queued by the engine to an audio track.
+ * Manages the playback of a list of byte arrays representing audio data that are queued by the
+ * engine to an audio track.
*/
-final class SynthesisPlaybackQueueItem extends PlaybackQueueItem {
+final class SynthesisPlaybackQueueItem extends PlaybackQueueItem
+ implements AudioTrack.OnPlaybackPositionUpdateListener {
private static final String TAG = "TTS.SynthQueueItem";
private static final boolean DBG = false;
@@ -63,6 +66,10 @@
private final BlockingAudioTrack mAudioTrack;
private final AbstractEventLogger mLogger;
+ // Stores a queue of markers. When the marker in front is reached the client is informed and we
+ // wait for the next one.
+ private ConcurrentLinkedQueue<ProgressMarker> markerList = new ConcurrentLinkedQueue<>();
+
SynthesisPlaybackQueueItem(AudioOutputParams audioParams, int sampleRate,
int audioFormat, int channelCount, UtteranceProgressDispatcher dispatcher,
Object callerIdentity, AbstractEventLogger logger) {
@@ -89,6 +96,8 @@
return;
}
+ mAudioTrack.setPlaybackPositionUpdateListener(this);
+
try {
byte[] buffer = null;
@@ -172,6 +181,55 @@
}
}
+ /** Convenience class for passing around TTS markers. */
+ private class ProgressMarker {
+ // The index in frames of this marker.
+ public final int frames;
+ // The start index in the text of the utterance.
+ public final int start;
+ // The end index (exclusive) in the text of the utterance.
+ public final int end;
+
+ public ProgressMarker(int frames, int start, int end) {
+ this.frames = frames;
+ this.start = start;
+ this.end = end;
+ }
+ }
+
+ /** Set a callback for the first marker in the queue. */
+ void updateMarker() {
+ ProgressMarker marker = markerList.peek();
+ if (marker != null) {
+ // Zero is used to disable the marker. The documentation recommends to use a non-zero
+ // position near zero such as 1.
+ int markerInFrames = marker.frames == 0 ? 1 : marker.frames;
+ mAudioTrack.setNotificationMarkerPosition(markerInFrames);
+ }
+ }
+
+ /** Informs us that at markerInFrames, the range between start and end is about to be spoken. */
+ void rangeStart(int markerInFrames, int start, int end) {
+ markerList.add(new ProgressMarker(markerInFrames, start, end));
+ updateMarker();
+ }
+
+ @Override
+ public void onMarkerReached(AudioTrack track) {
+ ProgressMarker marker = markerList.poll();
+ if (marker == null) {
+ Log.e(TAG, "onMarkerReached reached called but no marker in queue");
+ return;
+ }
+ // Inform the client.
+ getDispatcher().dispatchOnUtteranceRangeStart(marker.start, marker.end);
+ // Listen for the next marker.
+ // It's ok if this marker is in the past, in that case onMarkerReached will be called again.
+ updateMarker();
+ }
+
+ @Override
+ public void onPeriodicNotification(AudioTrack track) {}
void put(byte[] buffer) throws InterruptedException {
try {
diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java
index 24cad95..9a157b7 100644
--- a/core/java/android/speech/tts/TextToSpeech.java
+++ b/core/java/android/speech/tts/TextToSpeech.java
@@ -2103,55 +2103,69 @@
private boolean mEstablished;
- private final ITextToSpeechCallback.Stub mCallback = new ITextToSpeechCallback.Stub() {
- public void onStop(String utteranceId, boolean isStarted) throws RemoteException {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onStop(utteranceId, isStarted);
- }
- };
+ private final ITextToSpeechCallback.Stub mCallback =
+ new ITextToSpeechCallback.Stub() {
+ public void onStop(String utteranceId, boolean isStarted)
+ throws RemoteException {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onStop(utteranceId, isStarted);
+ }
+ };
- @Override
- public void onSuccess(String utteranceId) {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onDone(utteranceId);
- }
- }
+ @Override
+ public void onSuccess(String utteranceId) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onDone(utteranceId);
+ }
+ }
- @Override
- public void onError(String utteranceId, int errorCode) {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onError(utteranceId);
- }
- }
+ @Override
+ public void onError(String utteranceId, int errorCode) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onError(utteranceId);
+ }
+ }
- @Override
- public void onStart(String utteranceId) {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onStart(utteranceId);
- }
- }
+ @Override
+ public void onStart(String utteranceId) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onStart(utteranceId);
+ }
+ }
- @Override
- public void onBeginSynthesis(String utteranceId, int sampleRateInHz, int audioFormat,
- int channelCount) {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onBeginSynthesis(utteranceId, sampleRateInHz, audioFormat, channelCount);
- }
- }
+ @Override
+ public void onBeginSynthesis(
+ String utteranceId,
+ int sampleRateInHz,
+ int audioFormat,
+ int channelCount) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onBeginSynthesis(
+ utteranceId, sampleRateInHz, audioFormat, channelCount);
+ }
+ }
- @Override
- public void onAudioAvailable(String utteranceId, byte[] audio) {
- UtteranceProgressListener listener = mUtteranceProgressListener;
- if (listener != null) {
- listener.onAudioAvailable(utteranceId, audio);
- }
- }
- };
+ @Override
+ public void onAudioAvailable(String utteranceId, byte[] audio) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onAudioAvailable(utteranceId, audio);
+ }
+ }
+
+ @Override
+ public void onUtteranceRangeStart(String utteranceId, int start, int end) {
+ UtteranceProgressListener listener = mUtteranceProgressListener;
+ if (listener != null) {
+ listener.onUtteranceRangeStart(utteranceId, start, end);
+ }
+ }
+ };
private class SetupConnectionAsyncTask extends AsyncTask<Void, Void, Integer> {
private final ComponentName mName;
diff --git a/core/java/android/speech/tts/TextToSpeechService.java b/core/java/android/speech/tts/TextToSpeechService.java
index 55da52b..80d3c8a 100644
--- a/core/java/android/speech/tts/TextToSpeechService.java
+++ b/core/java/android/speech/tts/TextToSpeechService.java
@@ -663,6 +663,8 @@
void dispatchOnBeginSynthesis(int sampleRateInHz, int audioFormat, int channelCount);
void dispatchOnAudioAvailable(byte[] audio);
+
+ public void dispatchOnUtteranceRangeStart(int start, int end);
}
/** Set of parameters affecting audio output. */
@@ -882,6 +884,15 @@
}
}
+ @Override
+ public void dispatchOnUtteranceRangeStart(int start, int end) {
+ final String utteranceId = getUtteranceId();
+ if (utteranceId != null) {
+ mCallbacks.dispatchOnUtteranceRangeStart(
+ getCallerIdentity(), utteranceId, start, end);
+ }
+ }
+
abstract public String getUtteranceId();
String getStringParam(Bundle params, String key, String defaultValue) {
@@ -1559,6 +1570,17 @@
}
}
+ public void dispatchOnUtteranceRangeStart(
+ Object callerIdentity, String utteranceId, int start, int end) {
+ ITextToSpeechCallback cb = getCallbackFor(callerIdentity);
+ if (cb == null) return;
+ try {
+ cb.onUtteranceRangeStart(utteranceId, start, end);
+ } catch (RemoteException e) {
+ Log.e(TAG, "Callback dispatchOnUtteranceRangeStart(String, int, int) failed: " + e);
+ }
+ }
+
@Override
public void onCallbackDied(ITextToSpeechCallback callback, Object cookie) {
IBinder caller = (IBinder) cookie;
diff --git a/core/java/android/speech/tts/UtteranceProgressListener.java b/core/java/android/speech/tts/UtteranceProgressListener.java
index 72a5228..0ee3769 100644
--- a/core/java/android/speech/tts/UtteranceProgressListener.java
+++ b/core/java/android/speech/tts/UtteranceProgressListener.java
@@ -122,8 +122,24 @@
}
/**
- * Wraps an old deprecated OnUtteranceCompletedListener with a shiny new
- * progress listener.
+ * This is called when the TTS service is about to speak the specified range of the utterance
+ * with the given utteranceId.
+ *
+ * <p>This method is called when the audio is expected to start playing on the speaker. Note
+ * that this is different from {@link #onAudioAvailable} which is called as soon as the audio is
+ * generated.
+ *
+ * <p>Only called if the engine supplies timing information by calling {@link
+ * SynthesisCallback#rangeStart(int, int, int)}.
+ *
+ * @param utteranceId Unique id identifying the synthesis request.
+ * @param start The start index of the range in the utterance text.
+ * @param end The end index of the range (exclusive) in the utterance text.
+ */
+ public void onUtteranceRangeStart(String utteranceId, int start, int end) {}
+
+ /**
+ * Wraps an old deprecated OnUtteranceCompletedListener with a shiny new progress listener.
*
* @hide
*/