Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 1 | /** |
| 2 | * Copyright (C) 2014 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package android.media.soundtrigger; |
| 18 | |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 19 | import android.annotation.IntDef; |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 20 | import android.annotation.NonNull; |
| 21 | import android.annotation.Nullable; |
| 22 | import android.annotation.SystemApi; |
| 23 | import android.hardware.soundtrigger.IRecognitionStatusCallback; |
| 24 | import android.hardware.soundtrigger.SoundTrigger; |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 25 | import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; |
| 26 | import android.media.AudioFormat; |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 27 | import android.os.Handler; |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 28 | import android.os.Looper; |
| 29 | import android.os.Message; |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 30 | import android.os.ParcelUuid; |
| 31 | import android.os.RemoteException; |
| 32 | import android.util.Slog; |
| 33 | |
| 34 | import com.android.internal.app.ISoundTriggerService; |
| 35 | |
| 36 | import java.io.PrintWriter; |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 37 | import java.lang.annotation.Retention; |
| 38 | import java.lang.annotation.RetentionPolicy; |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 39 | import java.util.UUID; |
| 40 | |
| 41 | /** |
| 42 | * A class that allows interaction with the actual sound trigger detection on the system. |
| 43 | * Sound trigger detection refers to a detectors that match generic sound patterns that are |
| 44 | * not voice-based. The voice-based recognition models should utilize the {@link |
| 45 | * VoiceInteractionService} instead. Access to this class is protected by a permission |
| 46 | * granted only to system or privileged apps. |
| 47 | * |
| 48 | * @hide |
| 49 | */ |
Arunesh Mishra | 6a8fd79 | 2016-01-27 16:20:08 -0800 | [diff] [blame] | 50 | @SystemApi |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 51 | public final class SoundTriggerDetector { |
| 52 | private static final boolean DBG = false; |
| 53 | private static final String TAG = "SoundTriggerDetector"; |
| 54 | |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 55 | private static final int MSG_AVAILABILITY_CHANGED = 1; |
| 56 | private static final int MSG_SOUND_TRIGGER_DETECTED = 2; |
| 57 | private static final int MSG_DETECTION_ERROR = 3; |
| 58 | private static final int MSG_DETECTION_PAUSE = 4; |
| 59 | private static final int MSG_DETECTION_RESUME = 5; |
| 60 | |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 61 | private final Object mLock = new Object(); |
| 62 | |
| 63 | private final ISoundTriggerService mSoundTriggerService; |
| 64 | private final UUID mSoundModelId; |
| 65 | private final Callback mCallback; |
| 66 | private final Handler mHandler; |
| 67 | private final RecognitionCallback mRecognitionCallback; |
| 68 | |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 69 | /** @hide */ |
| 70 | @Retention(RetentionPolicy.SOURCE) |
| 71 | @IntDef(flag = true, |
| 72 | value = { |
| 73 | RECOGNITION_FLAG_NONE, |
| 74 | RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, |
| 75 | RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS |
| 76 | }) |
| 77 | public @interface RecognitionFlags {} |
| 78 | |
| 79 | /** |
| 80 | * Empty flag for {@link #startRecognition(int)}. |
| 81 | * |
| 82 | * @hide |
| 83 | */ |
| 84 | public static final int RECOGNITION_FLAG_NONE = 0; |
| 85 | |
| 86 | /** |
| 87 | * Recognition flag for {@link #startRecognition(int)} that indicates |
| 88 | * whether the trigger audio for hotword needs to be captured. |
| 89 | */ |
| 90 | public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; |
| 91 | |
| 92 | /** |
| 93 | * Recognition flag for {@link #startRecognition(int)} that indicates |
| 94 | * whether the recognition should keep going on even after the |
| 95 | * model triggers. |
| 96 | * If this flag is specified, it's possible to get multiple |
| 97 | * triggers after a call to {@link #startRecognition(int)}, if the model |
| 98 | * triggers multiple times. |
| 99 | * When this isn't specified, the default behavior is to stop recognition once the |
| 100 | * trigger happenss, till the caller starts recognition again. |
| 101 | */ |
| 102 | public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; |
| 103 | |
| 104 | /** |
| 105 | * Additional payload for {@link Callback#onDetected}. |
| 106 | */ |
| 107 | public static class EventPayload { |
| 108 | private final boolean mTriggerAvailable; |
| 109 | |
| 110 | // Indicates if {@code captureSession} can be used to continue capturing more audio |
| 111 | // from the DSP hardware. |
| 112 | private final boolean mCaptureAvailable; |
| 113 | // The session to use when attempting to capture more audio from the DSP hardware. |
| 114 | private final int mCaptureSession; |
| 115 | private final AudioFormat mAudioFormat; |
| 116 | // Raw data associated with the event. |
| 117 | // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. |
| 118 | private final byte[] mData; |
| 119 | |
| 120 | private EventPayload(boolean triggerAvailable, boolean captureAvailable, |
| 121 | AudioFormat audioFormat, int captureSession, byte[] data) { |
| 122 | mTriggerAvailable = triggerAvailable; |
| 123 | mCaptureAvailable = captureAvailable; |
| 124 | mCaptureSession = captureSession; |
| 125 | mAudioFormat = audioFormat; |
| 126 | mData = data; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Gets the format of the audio obtained using {@link #getTriggerAudio()}. |
| 131 | * May be null if there's no audio present. |
| 132 | */ |
| 133 | @Nullable |
| 134 | public AudioFormat getCaptureAudioFormat() { |
| 135 | return mAudioFormat; |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * Gets the raw audio that triggered the keyphrase. |
| 140 | * This may be null if the trigger audio isn't available. |
| 141 | * If non-null, the format of the audio can be obtained by calling |
| 142 | * {@link #getCaptureAudioFormat()}. |
| 143 | * |
| 144 | * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO |
| 145 | */ |
| 146 | @Nullable |
| 147 | public byte[] getTriggerAudio() { |
| 148 | if (mTriggerAvailable) { |
| 149 | return mData; |
| 150 | } else { |
| 151 | return null; |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Gets the session ID to start a capture from the DSP. |
| 157 | * This may be null if streaming capture isn't possible. |
| 158 | * If non-null, the format of the audio that can be captured can be |
| 159 | * obtained using {@link #getCaptureAudioFormat()}. |
| 160 | * |
| 161 | * TODO: Candidate for Public API when the API to start capture with a session ID |
| 162 | * is made public. |
| 163 | * |
| 164 | * TODO: Add this to {@link #getCaptureAudioFormat()}: |
| 165 | * "Gets the format of the audio obtained using {@link #getTriggerAudio()} |
| 166 | * or {@link #getCaptureSession()}. May be null if no audio can be obtained |
| 167 | * for either the trigger or a streaming session." |
| 168 | * |
| 169 | * TODO: Should this return a known invalid value instead? |
| 170 | * |
| 171 | * @hide |
| 172 | */ |
| 173 | @Nullable |
| 174 | public Integer getCaptureSession() { |
| 175 | if (mCaptureAvailable) { |
| 176 | return mCaptureSession; |
| 177 | } else { |
| 178 | return null; |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | public static abstract class Callback { |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 184 | /** |
| 185 | * Called when the availability of the sound model changes. |
| 186 | */ |
| 187 | public abstract void onAvailabilityChanged(int status); |
| 188 | |
| 189 | /** |
| 190 | * Called when the sound model has triggered (such as when it matched a |
| 191 | * given sound pattern). |
| 192 | */ |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 193 | public abstract void onDetected(@NonNull EventPayload eventPayload); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 194 | |
| 195 | /** |
| 196 | * Called when the detection fails due to an error. |
| 197 | */ |
| 198 | public abstract void onError(); |
| 199 | |
| 200 | /** |
| 201 | * Called when the recognition is paused temporarily for some reason. |
| 202 | * This is an informational callback, and the clients shouldn't be doing anything here |
| 203 | * except showing an indication on their UI if they have to. |
| 204 | */ |
| 205 | public abstract void onRecognitionPaused(); |
| 206 | |
| 207 | /** |
| 208 | * Called when the recognition is resumed after it was temporarily paused. |
| 209 | * This is an informational callback, and the clients shouldn't be doing anything here |
| 210 | * except showing an indication on their UI if they have to. |
| 211 | */ |
| 212 | public abstract void onRecognitionResumed(); |
| 213 | } |
| 214 | |
| 215 | /** |
| 216 | * This class should be constructed by the {@link SoundTriggerManager}. |
| 217 | * @hide |
| 218 | */ |
| 219 | SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, |
| 220 | @NonNull Callback callback, @Nullable Handler handler) { |
| 221 | mSoundTriggerService = soundTriggerService; |
| 222 | mSoundModelId = soundModelId; |
| 223 | mCallback = callback; |
| 224 | if (handler == null) { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 225 | mHandler = new MyHandler(); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 226 | } else { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 227 | mHandler = new MyHandler(handler.getLooper()); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 228 | } |
| 229 | mRecognitionCallback = new RecognitionCallback(); |
| 230 | } |
| 231 | |
| 232 | /** |
| 233 | * Starts recognition on the associated sound model. Result is indicated via the |
| 234 | * {@link Callback}. |
| 235 | * @return Indicates whether the call succeeded or not. |
| 236 | */ |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 237 | public boolean startRecognition(@RecognitionFlags int recognitionFlags) { |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 238 | if (DBG) { |
| 239 | Slog.d(TAG, "startRecognition()"); |
| 240 | } |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 241 | boolean captureTriggerAudio = |
| 242 | (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; |
| 243 | |
| 244 | boolean allowMultipleTriggers = |
| 245 | (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 246 | try { |
| 247 | mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId), |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 248 | mRecognitionCallback, new RecognitionConfig(captureTriggerAudio, |
| 249 | allowMultipleTriggers, null, null)); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 250 | } catch (RemoteException e) { |
| 251 | return false; |
| 252 | } |
| 253 | return true; |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * Stops recognition for the associated model. |
| 258 | */ |
| 259 | public boolean stopRecognition() { |
| 260 | try { |
| 261 | mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId), |
| 262 | mRecognitionCallback); |
| 263 | } catch (RemoteException e) { |
| 264 | return false; |
| 265 | } |
| 266 | return true; |
| 267 | } |
| 268 | |
Arunesh Mishra | 6a8fd79 | 2016-01-27 16:20:08 -0800 | [diff] [blame] | 269 | /** |
| 270 | * @hide |
| 271 | */ |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 272 | public void dump(String prefix, PrintWriter pw) { |
| 273 | synchronized (mLock) { |
| 274 | // TODO: Dump useful debug information. |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | /** |
| 279 | * Callback that handles events from the lower sound trigger layer. |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 280 | * |
| 281 | * Note that these callbacks will be called synchronously from the SoundTriggerService |
| 282 | * layer and thus should do minimal work (such as sending a message on a handler to do |
| 283 | * the real work). |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 284 | * @hide |
| 285 | */ |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 286 | private class RecognitionCallback extends IRecognitionStatusCallback.Stub { |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 287 | |
| 288 | /** |
| 289 | * @hide |
| 290 | */ |
| 291 | @Override |
| 292 | public void onDetected(SoundTrigger.RecognitionEvent event) { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 293 | Slog.d(TAG, "onDetected()" + event); |
| 294 | Message.obtain(mHandler, |
| 295 | MSG_SOUND_TRIGGER_DETECTED, |
| 296 | new EventPayload(event.triggerInData, event.captureAvailable, |
| 297 | event.captureFormat, event.captureSession, event.data)) |
| 298 | .sendToTarget(); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 299 | } |
| 300 | |
| 301 | /** |
| 302 | * @hide |
| 303 | */ |
| 304 | @Override |
| 305 | public void onError(int status) { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 306 | Slog.d(TAG, "onError()" + status); |
| 307 | mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 308 | } |
| 309 | |
| 310 | /** |
| 311 | * @hide |
| 312 | */ |
| 313 | @Override |
| 314 | public void onRecognitionPaused() { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 315 | Slog.d(TAG, "onRecognitionPaused()"); |
| 316 | mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE); |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 317 | } |
| 318 | |
| 319 | /** |
| 320 | * @hide |
| 321 | */ |
| 322 | @Override |
| 323 | public void onRecognitionResumed() { |
Arunesh Mishra | 3fff7f5 | 2016-02-09 12:15:19 -0800 | [diff] [blame^] | 324 | Slog.d(TAG, "onRecognitionResumed()"); |
| 325 | mHandler.sendEmptyMessage(MSG_DETECTION_RESUME); |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | private class MyHandler extends Handler { |
| 330 | |
| 331 | MyHandler() { |
| 332 | super(); |
| 333 | } |
| 334 | |
| 335 | MyHandler(Looper looper) { |
| 336 | super(looper); |
| 337 | } |
| 338 | |
| 339 | @Override |
| 340 | public void handleMessage(Message msg) { |
| 341 | if (mCallback == null) { |
| 342 | Slog.w(TAG, "Received message: " + msg.what + " for NULL callback."); |
| 343 | return; |
| 344 | } |
| 345 | switch (msg.what) { |
| 346 | case MSG_SOUND_TRIGGER_DETECTED: |
| 347 | mCallback.onDetected((EventPayload) msg.obj); |
| 348 | break; |
| 349 | case MSG_DETECTION_ERROR: |
| 350 | mCallback.onError(); |
| 351 | break; |
| 352 | case MSG_DETECTION_PAUSE: |
| 353 | mCallback.onRecognitionPaused(); |
| 354 | break; |
| 355 | case MSG_DETECTION_RESUME: |
| 356 | mCallback.onRecognitionResumed(); |
| 357 | break; |
| 358 | default: |
| 359 | super.handleMessage(msg); |
| 360 | |
| 361 | } |
Arunesh Mishra | a772e5f | 2016-01-25 10:33:11 -0800 | [diff] [blame] | 362 | } |
| 363 | } |
| 364 | } |