Merge "Implement time markers for TTS."

commit: f549a5c3f51d09e605b855fffc48a57185dc9093 [log] [tgz]
author: Niels Egberts <nielse@google.com> Tue Jan 24 17:19:30 2017 +0000
committer: Android (Google) Code Review <android-gerrit@google.com> Tue Jan 24 17:19:34 2017 +0000
tree: e7435fcaa38bd0d3fed1433a92df9a41832a6231
parent: f779f91cd2c944a2c7c6ecbae124847fc3d7a034 [diff]
parent: 65c50784564d0bae9276fde5472dd8898a781bcd [diff]
diff --git a/api/current.txt b/api/current.txt
index 243c78f..2fe3f44 100644
--- a/api/current.txt
+++ b/api/current.txt

@@ -36245,6 +36245,7 @@
     method public abstract int getMaxBufferSize();
     method public abstract boolean hasFinished();
     method public abstract boolean hasStarted();
+    method public default void rangeStart(int, int, int);
     method public abstract int start(int, int, int);
   }
 
@@ -36397,6 +36398,7 @@
     method public void onError(java.lang.String, int);
     method public abstract void onStart(java.lang.String);
     method public void onStop(java.lang.String, boolean);
+    method public void onUtteranceRangeStart(java.lang.String, int, int);
   }
 
   public class Voice implements android.os.Parcelable {

diff --git a/api/system-current.txt b/api/system-current.txt
index 063c3c3..7a6d87a 100644
--- a/api/system-current.txt
+++ b/api/system-current.txt

@@ -39235,6 +39235,7 @@
     method public abstract int getMaxBufferSize();
     method public abstract boolean hasFinished();
     method public abstract boolean hasStarted();
+    method public default void rangeStart(int, int, int);
     method public abstract int start(int, int, int);
   }
 
@@ -39387,6 +39388,7 @@
     method public void onError(java.lang.String, int);
     method public abstract void onStart(java.lang.String);
     method public void onStop(java.lang.String, boolean);
+    method public void onUtteranceRangeStart(java.lang.String, int, int);
   }
 
   public class Voice implements android.os.Parcelable {

diff --git a/api/test-current.txt b/api/test-current.txt
index 4fcaf69..cd71166 100644
--- a/api/test-current.txt
+++ b/api/test-current.txt

@@ -36366,6 +36366,7 @@
     method public abstract int getMaxBufferSize();
     method public abstract boolean hasFinished();
     method public abstract boolean hasStarted();
+    method public default void rangeStart(int, int, int);
     method public abstract int start(int, int, int);
   }
 
@@ -36518,6 +36519,7 @@
     method public void onError(java.lang.String, int);
     method public abstract void onStart(java.lang.String);
     method public void onStop(java.lang.String, boolean);
+    method public void onUtteranceRangeStart(java.lang.String, int, int);
   }
 
   public class Voice implements android.os.Parcelable {

diff --git a/core/java/android/speech/tts/BlockingAudioTrack.java b/core/java/android/speech/tts/BlockingAudioTrack.java
index 9920ea1..be5851c 100644
--- a/core/java/android/speech/tts/BlockingAudioTrack.java
+++ b/core/java/android/speech/tts/BlockingAudioTrack.java

@@ -164,7 +164,7 @@
         // all data from the audioTrack has been sent to the mixer, so
         // it's safe to release at this point.
         if (DBG) Log.d(TAG, "Releasing audio track [" + track.hashCode() + "]");
-        synchronized(mAudioTrackLock) {
+        synchronized (mAudioTrackLock) {
             mAudioTrack = null;
         }
         track.release();
@@ -340,4 +340,25 @@
         return value < min ? min : (value < max ? value : max);
     }
 
+    /**
+     * @see
+     *     AudioTrack#setPlaybackPositionUpdateListener(AudioTrack.OnPlaybackPositionUpdateListener).
+     */
+    public void setPlaybackPositionUpdateListener(
+            AudioTrack.OnPlaybackPositionUpdateListener listener) {
+        synchronized (mAudioTrackLock) {
+            if (mAudioTrack != null) {
+                mAudioTrack.setPlaybackPositionUpdateListener(listener);
+            }
+        }
+    }
+
+    /** @see AudioTrack#setNotificationMarkerPosition(int). */
+    public void setNotificationMarkerPosition(int frames) {
+        synchronized (mAudioTrackLock) {
+            if (mAudioTrack != null) {
+                mAudioTrack.setNotificationMarkerPosition(frames);
+            }
+        }
+    }
 }

diff --git a/core/java/android/speech/tts/ITextToSpeechCallback.aidl b/core/java/android/speech/tts/ITextToSpeechCallback.aidl
index 4e3acf6..edb6e48 100644
--- a/core/java/android/speech/tts/ITextToSpeechCallback.aidl
+++ b/core/java/android/speech/tts/ITextToSpeechCallback.aidl

@@ -83,4 +83,19 @@
      * callback.
      */
     void onAudioAvailable(String utteranceId, in byte[] audio);
+
+    /**
+     * Tells the client that the engine is about to speak the specified range of the utterance.
+     *
+     * <p>
+     * Only called if the engine supplies timing information by calling
+     * {@link SynthesisCallback#rangeStart(int, int, int)} and only when the request is played back
+     * by the service, not when using {@link android.speech.tts.TextToSpeech#synthesizeToFile}.
+     * </p>
+     *
+     * @param utteranceId Unique id identifying the synthesis request.
+     * @param start The start character index of the range in the utterance text.
+     * @param end The end character index of the range (exclusive) in the utterance text.
+     */
+    void onUtteranceRangeStart(String utteranceId, int start, int end);
 }

diff --git a/core/java/android/speech/tts/PlaybackSynthesisCallback.java b/core/java/android/speech/tts/PlaybackSynthesisCallback.java
index 778aa86..9e24b09 100644
--- a/core/java/android/speech/tts/PlaybackSynthesisCallback.java
+++ b/core/java/android/speech/tts/PlaybackSynthesisCallback.java

@@ -271,4 +271,12 @@
             mStatusCode = errorCode;
         }
     }
+
+    public void rangeStart(int markerInFrames, int start, int end) {
+        if (mItem == null) {
+            Log.e(TAG, "mItem is null");
+            return;
+        }
+        mItem.rangeStart(markerInFrames, start, end);
+    }
 }

diff --git a/core/java/android/speech/tts/SynthesisCallback.java b/core/java/android/speech/tts/SynthesisCallback.java
index 2fd8499..8b74ed7 100644
--- a/core/java/android/speech/tts/SynthesisCallback.java
+++ b/core/java/android/speech/tts/SynthesisCallback.java

@@ -142,4 +142,26 @@
      * <p>Useful for checking if a fallback from network request is possible.
      */
     boolean hasFinished();
+
+    /**
+     * The service may call this method to provide timing information about the spoken text.
+     *
+     * <p>Calling this method means that at the given audio frame, the given range of the input is
+     * about to be spoken. If this method is called the client will receive a callback on the
+     * listener ({@link UtteranceProgressListener#onUtteranceRangeStart}) at the moment that frame
+     * has been reached by the playback head.
+     *
+     * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
+     * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
+     * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
+     * for more information.
+     *
+     * <p>This method should only be called on the synthesis thread, while in {@link
+     * TextToSpeechService#onSynthesizeText}.
+     *
+     * @param markerInFrames The position in frames in the audio where this range is spoken.
+     * @param start The start index of the range in the input text.
+     * @param end The end index (exclusive) of the range in the input text.
+     */
+    default void rangeStart(int markerInFrames, int start, int end) {}
 }

diff --git a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java
index 7423933..cb5f220 100644
--- a/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java
+++ b/core/java/android/speech/tts/SynthesisPlaybackQueueItem.java

@@ -17,18 +17,21 @@
 
 import android.speech.tts.TextToSpeechService.AudioOutputParams;
 import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
+import android.media.AudioTrack;
 import android.util.Log;
 
 import java.util.LinkedList;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.ConcurrentLinkedQueue;
 
 /**
- * Manages the playback of a list of byte arrays representing audio data
- * that are queued by the engine to an audio track.
+ * Manages the playback of a list of byte arrays representing audio data that are queued by the
+ * engine to an audio track.
  */
-final class SynthesisPlaybackQueueItem extends PlaybackQueueItem {
+final class SynthesisPlaybackQueueItem extends PlaybackQueueItem
+        implements AudioTrack.OnPlaybackPositionUpdateListener {
     private static final String TAG = "TTS.SynthQueueItem";
     private static final boolean DBG = false;
 
@@ -63,6 +66,10 @@
     private final BlockingAudioTrack mAudioTrack;
     private final AbstractEventLogger mLogger;
 
+    // Stores a queue of markers. When the marker in front is reached the client is informed and we
+    // wait for the next one.
+    private ConcurrentLinkedQueue<ProgressMarker> markerList = new ConcurrentLinkedQueue<>();
+
     SynthesisPlaybackQueueItem(AudioOutputParams audioParams, int sampleRate,
             int audioFormat, int channelCount, UtteranceProgressDispatcher dispatcher,
             Object callerIdentity, AbstractEventLogger logger) {
@@ -89,6 +96,8 @@
             return;
         }
 
+        mAudioTrack.setPlaybackPositionUpdateListener(this);
+
         try {
             byte[] buffer = null;
 
@@ -172,6 +181,55 @@
         }
     }
 
+    /** Convenience class for passing around TTS markers. */
+    private class ProgressMarker {
+        // The index in frames of this marker.
+        public final int frames;
+        // The start index in the text of the utterance.
+        public final int start;
+        // The end index (exclusive) in the text of the utterance.
+        public final int end;
+
+        public ProgressMarker(int frames, int start, int end) {
+            this.frames = frames;
+            this.start = start;
+            this.end = end;
+        }
+    }
+
+    /** Set a callback for the first marker in the queue. */
+    void updateMarker() {
+        ProgressMarker marker = markerList.peek();
+        if (marker != null) {
+            // Zero is used to disable the marker. The documentation recommends to use a non-zero
+            // position near zero such as 1.
+            int markerInFrames = marker.frames == 0 ? 1 : marker.frames;
+            mAudioTrack.setNotificationMarkerPosition(markerInFrames);
+        }
+    }
+
+    /** Informs us that at markerInFrames, the range between start and end is about to be spoken. */
+    void rangeStart(int markerInFrames, int start, int end) {
+        markerList.add(new ProgressMarker(markerInFrames, start, end));
+        updateMarker();
+    }
+
+    @Override
+    public void onMarkerReached(AudioTrack track) {
+        ProgressMarker marker = markerList.poll();
+        if (marker == null) {
+            Log.e(TAG, "onMarkerReached reached called but no marker in queue");
+            return;
+        }
+        // Inform the client.
+        getDispatcher().dispatchOnUtteranceRangeStart(marker.start, marker.end);
+        // Listen for the next marker.
+        // It's ok if this marker is in the past, in that case onMarkerReached will be called again.
+        updateMarker();
+    }
+
+    @Override
+    public void onPeriodicNotification(AudioTrack track) {}
 
     void put(byte[] buffer) throws InterruptedException {
         try {

diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java
index 24cad95..9a157b7 100644
--- a/core/java/android/speech/tts/TextToSpeech.java
+++ b/core/java/android/speech/tts/TextToSpeech.java

@@ -2103,55 +2103,69 @@
 
         private boolean mEstablished;
 
-        private final ITextToSpeechCallback.Stub mCallback = new ITextToSpeechCallback.Stub() {
-            public void onStop(String utteranceId, boolean isStarted) throws RemoteException {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onStop(utteranceId, isStarted);
-                }
-            };
+        private final ITextToSpeechCallback.Stub mCallback =
+                new ITextToSpeechCallback.Stub() {
+                    public void onStop(String utteranceId, boolean isStarted)
+                            throws RemoteException {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onStop(utteranceId, isStarted);
+                        }
+                    };
 
-            @Override
-            public void onSuccess(String utteranceId) {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onDone(utteranceId);
-                }
-            }
+                    @Override
+                    public void onSuccess(String utteranceId) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onDone(utteranceId);
+                        }
+                    }
 
-            @Override
-            public void onError(String utteranceId, int errorCode) {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onError(utteranceId);
-                }
-            }
+                    @Override
+                    public void onError(String utteranceId, int errorCode) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onError(utteranceId);
+                        }
+                    }
 
-            @Override
-            public void onStart(String utteranceId) {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onStart(utteranceId);
-                }
-            }
+                    @Override
+                    public void onStart(String utteranceId) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onStart(utteranceId);
+                        }
+                    }
 
-            @Override
-            public void onBeginSynthesis(String utteranceId, int sampleRateInHz, int audioFormat,
-                                     int channelCount) {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onBeginSynthesis(utteranceId, sampleRateInHz, audioFormat, channelCount);
-                }
-            }
+                    @Override
+                    public void onBeginSynthesis(
+                            String utteranceId,
+                            int sampleRateInHz,
+                            int audioFormat,
+                            int channelCount) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onBeginSynthesis(
+                                    utteranceId, sampleRateInHz, audioFormat, channelCount);
+                        }
+                    }
 
-            @Override
-            public void onAudioAvailable(String utteranceId, byte[] audio) {
-                UtteranceProgressListener listener = mUtteranceProgressListener;
-                if (listener != null) {
-                    listener.onAudioAvailable(utteranceId, audio);
-                }
-            }
-        };
+                    @Override
+                    public void onAudioAvailable(String utteranceId, byte[] audio) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onAudioAvailable(utteranceId, audio);
+                        }
+                    }
+
+                    @Override
+                    public void onUtteranceRangeStart(String utteranceId, int start, int end) {
+                        UtteranceProgressListener listener = mUtteranceProgressListener;
+                        if (listener != null) {
+                            listener.onUtteranceRangeStart(utteranceId, start, end);
+                        }
+                    }
+                };
 
         private class SetupConnectionAsyncTask extends AsyncTask<Void, Void, Integer> {
             private final ComponentName mName;

diff --git a/core/java/android/speech/tts/TextToSpeechService.java b/core/java/android/speech/tts/TextToSpeechService.java
index 55da52b..80d3c8a 100644
--- a/core/java/android/speech/tts/TextToSpeechService.java
+++ b/core/java/android/speech/tts/TextToSpeechService.java

@@ -663,6 +663,8 @@
         void dispatchOnBeginSynthesis(int sampleRateInHz, int audioFormat, int channelCount);
 
         void dispatchOnAudioAvailable(byte[] audio);
+
+        public void dispatchOnUtteranceRangeStart(int start, int end);
     }
 
     /** Set of parameters affecting audio output. */
@@ -882,6 +884,15 @@
             }
         }
 
+        @Override
+        public void dispatchOnUtteranceRangeStart(int start, int end) {
+            final String utteranceId = getUtteranceId();
+            if (utteranceId != null) {
+                mCallbacks.dispatchOnUtteranceRangeStart(
+                        getCallerIdentity(), utteranceId, start, end);
+            }
+        }
+
         abstract public String getUtteranceId();
 
         String getStringParam(Bundle params, String key, String defaultValue) {
@@ -1559,6 +1570,17 @@
             }
         }
 
+        public void dispatchOnUtteranceRangeStart(
+                Object callerIdentity, String utteranceId, int start, int end) {
+            ITextToSpeechCallback cb = getCallbackFor(callerIdentity);
+            if (cb == null) return;
+            try {
+                cb.onUtteranceRangeStart(utteranceId, start, end);
+            } catch (RemoteException e) {
+                Log.e(TAG, "Callback dispatchOnUtteranceRangeStart(String, int, int) failed: " + e);
+            }
+        }
+
         @Override
         public void onCallbackDied(ITextToSpeechCallback callback, Object cookie) {
             IBinder caller = (IBinder) cookie;

diff --git a/core/java/android/speech/tts/UtteranceProgressListener.java b/core/java/android/speech/tts/UtteranceProgressListener.java
index 72a5228..0ee3769 100644
--- a/core/java/android/speech/tts/UtteranceProgressListener.java
+++ b/core/java/android/speech/tts/UtteranceProgressListener.java

@@ -122,8 +122,24 @@
     }
 
     /**
-     * Wraps an old deprecated OnUtteranceCompletedListener with a shiny new
-     * progress listener.
+     * This is called when the TTS service is about to speak the specified range of the utterance
+     * with the given utteranceId.
+     *
+     * <p>This method is called when the audio is expected to start playing on the speaker. Note
+     * that this is different from {@link #onAudioAvailable} which is called as soon as the audio is
+     * generated.
+     *
+     * <p>Only called if the engine supplies timing information by calling {@link
+     * SynthesisCallback#rangeStart(int, int, int)}.
+     *
+     * @param utteranceId Unique id identifying the synthesis request.
+     * @param start The start index of the range in the utterance text.
+     * @param end The end index of the range (exclusive) in the utterance text.
+     */
+    public void onUtteranceRangeStart(String utteranceId, int start, int end) {}
+
+    /**
+     * Wraps an old deprecated OnUtteranceCompletedListener with a shiny new progress listener.
      *
      * @hide
      */
commit	f549a5c3f51d09e605b855fffc48a57185dc9093	[log] [tgz]
author	Niels Egberts <nielse@google.com>	Tue Jan 24 17:19:30 2017 +0000
committer	Android (Google) Code Review <android-gerrit@google.com>	Tue Jan 24 17:19:34 2017 +0000
tree	e7435fcaa38bd0d3fed1433a92df9a41832a6231
parent	f779f91cd2c944a2c7c6ecbae124847fc3d7a034 [diff]
parent	65c50784564d0bae9276fde5472dd8898a781bcd [diff]