blob: 1f48a92c42bbdd8af9cbb2324499e89eb76ead74 [file] [log] [blame]
/**
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.hardware.soundtrigger;
import android.os.Handler;
import java.util.ArrayList;
import java.util.UUID;
/**
* The SoundTrigger class provides access via JNI to the native service managing
* the sound trigger HAL.
*
* @hide
*/
public class SoundTrigger {
public static final int STATUS_OK = 0;
public static final int STATUS_ERROR = Integer.MIN_VALUE;
public static final int STATUS_PERMISSION_DENIED = -1;
public static final int STATUS_NO_INIT = -19;
public static final int STATUS_BAD_VALUE = -22;
public static final int STATUS_DEAD_OBJECT = -32;
public static final int STATUS_INVALID_OPERATION = -38;
/*****************************************************************************
* A ModuleProperties describes a given sound trigger hardware module
* managed by the native sound trigger service. Each module has a unique
* ID used to target any API call to this paricular module. Module
* properties are returned by listModules() method.
****************************************************************************/
public static class ModuleProperties {
/** Unique module ID provided by the native service */
public final int id;
/** human readable voice detection engine implementor */
public final String implementor;
/** human readable voice detection engine description */
public final String description;
/** Unique voice engine Id (changes with each version) */
public final UUID uuid;
/** Voice detection engine version */
public final int version;
/** Maximum number of active sound models */
public final int maxSoundModels;
/** Maximum number of key phrases */
public final int maxKeyphrases;
/** Maximum number of users per key phrase */
public final int maxUsers;
/** Supported recognition modes (bit field, RECOGNITION_MODE_VOICE_TRIGGER ...) */
public final int recognitionModes;
/** Supports seamless transition to capture mode after recognition */
public final boolean supportsCaptureTransition;
/** Maximum buffering capacity in ms if supportsCaptureTransition() is true */
public final int maxBufferMs;
/** Supports capture by other use cases while detection is active */
public final boolean supportsConcurrentCapture;
/** Rated power consumption when detection is active with TDB silence/sound/speech ratio */
public final int powerConsumptionMw;
ModuleProperties(int id, String implementor, String description,
String uuid, int version, int maxSoundModels, int maxKeyphrases,
int maxUsers, int recognitionModes, boolean supportsCaptureTransition,
int maxBufferMs, boolean supportsConcurrentCapture,
int powerConsumptionMw) {
this.id = id;
this.implementor = implementor;
this.description = description;
this.uuid = UUID.fromString(uuid);
this.version = version;
this.maxSoundModels = maxSoundModels;
this.maxKeyphrases = maxKeyphrases;
this.maxUsers = maxUsers;
this.recognitionModes = recognitionModes;
this.supportsCaptureTransition = supportsCaptureTransition;
this.maxBufferMs = maxBufferMs;
this.supportsConcurrentCapture = supportsConcurrentCapture;
this.powerConsumptionMw = powerConsumptionMw;
}
}
/*****************************************************************************
* A SoundModel describes the attributes and contains the binary data used by the hardware
* implementation to detect a particular sound pattern.
* A specialized version {@link KeyphraseSoundModel} is defined for key phrase
* sound models.
****************************************************************************/
public static class SoundModel {
/** Undefined sound model type */
public static final int TYPE_UNKNOWN = -1;
/** Keyphrase sound model */
public static final int TYPE_KEYPHRASE = 0;
/** Unique sound model identifier */
public final UUID uuid;
/** Sound model type (e.g. TYPE_KEYPHRASE); */
public final int type;
/** Opaque data. For use by vendor implementation and enrollment application */
public final byte[] data;
public SoundModel(UUID uuid, int type, byte[] data) {
this.uuid = uuid;
this.type = type;
this.data = data;
}
}
/*****************************************************************************
* A Keyphrase describes a key phrase that can be detected by a
* {@link KeyphraseSoundModel}
****************************************************************************/
public static class Keyphrase {
/** Unique identifier for this keyphrase */
public final int id;
/** Recognition modes supported for this key phrase in the model */
public final int recognitionModes;
/** Locale of the keyphrase. JAVA Locale string e.g en_US */
public final String locale;
/** Key phrase text */
public final String text;
/** Users this key phrase has been trained for. countains sound trigger specific user IDs
* derived from system user IDs {@link android.os.UserHandle#getIdentifier()}. */
public final int[] users;
public Keyphrase(int id, int recognitionModes, String locale, String text, int[] users) {
this.id = id;
this.recognitionModes = recognitionModes;
this.locale = locale;
this.text = text;
this.users = users;
}
}
/*****************************************************************************
* A KeyphraseSoundModel is a specialized {@link SoundModel} for key phrases.
* It contains data needed by the hardware to detect a certain number of key phrases
* and the list of corresponding {@link Keyphrase} descriptors.
****************************************************************************/
public static class KeyphraseSoundModel extends SoundModel {
/** Key phrases in this sound model */
public final Keyphrase[] keyphrases; // keyword phrases in model
public KeyphraseSoundModel(UUID id, byte[] data, Keyphrase[] keyphrases) {
super(id, TYPE_KEYPHRASE, data);
this.keyphrases = keyphrases;
}
}
/**
* Modes for key phrase recognition
*/
/** Simple recognition of the key phrase */
public static final int RECOGNITION_MODE_VOICE_TRIGGER = 0x1;
/** Trigger only if one user is identified */
public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 0x2;
/** Trigger only if one user is authenticated */
public static final int RECOGNITION_MODE_USER_AUTHENTICATION = 0x4;
/**
* Status codes for {@link RecognitionEvent}
*/
/** Recognition success */
public static final int RECOGNITION_STATUS_SUCCESS = 0;
/** Recognition aborted (e.g. capture preempted by anotehr use case */
public static final int RECOGNITION_STATUS_ABORT = 1;
/** Recognition failure */
public static final int RECOGNITION_STATUS_FAILURE = 2;
/**
* A RecognitionEvent is provided by the
* {@link StatusListener#onRecognition(RecognitionEvent)}
* callback upon recognition success or failure.
*/
public static class RecognitionEvent {
/** Recognition status e.g {@link #RECOGNITION_STATUS_SUCCESS} */
public final int status;
/** Sound Model corresponding to this event callback */
public final int soundModelHandle;
/** True if it is possible to capture audio from this utterance buffered by the hardware */
public final boolean captureAvailable;
/** Audio session ID to be used when capturing the utterance with an AudioRecord
* if captureAvailable() is true. */
public final int captureSession;
/** Delay in ms between end of model detection and start of audio available for capture.
* A negative value is possible (e.g. if keyphrase is also available for capture) */
public final int captureDelayMs;
/** Duration in ms of audio captured before the start of the trigger. 0 if none. */
public final int capturePreambleMs;
/** Opaque data for use by system applications who know about voice engine internals,
* typically during enrollment. */
public final byte[] data;
RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data) {
this.status = status;
this.soundModelHandle = soundModelHandle;
this.captureAvailable = captureAvailable;
this.captureSession = captureSession;
this.captureDelayMs = captureDelayMs;
this.capturePreambleMs = capturePreambleMs;
this.data = data;
}
}
/**
* A RecognitionConfig is provided to
* {@link SoundTriggerModule#startRecognition(int, RecognitionConfig)} to configure the
* recognition request.
*/
public static class RecognitionConfig {
/** True if the DSP should capture the trigger sound and make it available for further
* capture. */
public final boolean captureRequested;
/** List of all keyphrases in the sound model for which recognition should be performed with
* options for each keyphrase. */
public final KeyphraseRecognitionExtra keyphrases[];
/** Opaque data for use by system applications who know about voice engine internals,
* typically during enrollment. */
public final byte[] data;
public RecognitionConfig(boolean captureRequested,
KeyphraseRecognitionExtra keyphrases[], byte[] data) {
this.captureRequested = captureRequested;
this.keyphrases = keyphrases;
this.data = data;
}
}
/**
* Confidence level for users defined in a keyphrase.
* - The confidence level is expressed in percent (0% -100%).
* When used in a {@link KeyphraseRecognitionEvent} it indicates the detected confidence level
* When used in a {@link RecognitionConfig} it indicates the minimum confidence level that
* should trigger a recognition.
* - The user ID is derived from the system ID {@link android.os.UserHandle#getIdentifier()}.
*/
public static class ConfidenceLevel {
public final int userId;
public final int confidenceLevel;
public ConfidenceLevel(int userId, int confidenceLevel) {
this.userId = userId;
this.confidenceLevel = confidenceLevel;
}
}
/**
* Additional data conveyed by a {@link KeyphraseRecognitionEvent}
* for a key phrase detection.
*/
public static class KeyphraseRecognitionExtra {
/** The keyphrse ID */
public final int id;
/** Recognition modes matched for this event */
public final int recognitionModes;
/** Confidence levels for all users recognized (KeyphraseRecognitionEvent) or to
* be recognized (RecognitionConfig) */
public final ConfidenceLevel[] confidenceLevels;
public KeyphraseRecognitionExtra(int id, int recognitionModes,
ConfidenceLevel[] confidenceLevels) {
this.id = id;
this.recognitionModes = recognitionModes;
this.confidenceLevels = confidenceLevels;
}
}
/**
* Specialized {@link RecognitionEvent} for a key phrase detection.
*/
public static class KeyphraseRecognitionEvent extends RecognitionEvent {
/** Indicates if the key phrase is present in the buffered audio available for capture */
public final KeyphraseRecognitionExtra[] keyphraseExtras;
/** Additional data available for each recognized key phrases in the model */
public final boolean keyphraseInCapture;
KeyphraseRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data,
boolean keyphraseInCapture, KeyphraseRecognitionExtra[] keyphraseExtras) {
super(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs,
capturePreambleMs, data);
this.keyphraseInCapture = keyphraseInCapture;
this.keyphraseExtras = keyphraseExtras;
}
}
/**
* Returns a list of descriptors for all harware modules loaded.
* @param modules A ModuleProperties array where the list will be returned.
* @return - {@link #STATUS_OK} in case of success
* - {@link #STATUS_ERROR} in case of unspecified error
* - {@link #STATUS_PERMISSION_DENIED} if the caller does not have system permission
* - {@link #STATUS_NO_INIT} if the native service cannot be reached
* - {@link #STATUS_BAD_VALUE} if modules is null
* - {@link #STATUS_DEAD_OBJECT} if the binder transaction to the native service fails
*/
public static native int listModules(ArrayList <ModuleProperties> modules);
/**
* Get an interface on a hardware module to control sound models and recognition on
* this module.
* @param moduleId Sound module system identifier {@link ModuleProperties#id}. mandatory.
* @param listener {@link StatusListener} interface. Mandatory.
* @param handler the Handler that will receive the callabcks. Can be null if default handler
* is OK.
* @return a valid sound module in case of success or null in case of error.
*/
public static SoundTriggerModule attachModule(int moduleId,
StatusListener listener,
Handler handler) {
if (listener == null) {
return null;
}
SoundTriggerModule module = new SoundTriggerModule(moduleId, listener, handler);
return module;
}
/**
* Interface provided by the client application when attaching to a {@link SoundTriggerModule}
* to received recognition and error notifications.
*/
public static interface StatusListener {
/**
* Called when recognition succeeds of fails
*/
public abstract void onRecognition(RecognitionEvent event);
/**
* Called when the sound trigger native service dies
*/
public abstract void onServiceDied();
}
}