Blame - audio/include/system/sound_trigger.h - fp2-dev/platform/system/media

Rom Lemarchand

9bd6157

2015-04-03 15:12:20 -0700

[diff] [blame]

/*

*

* Licensed under the Apache License, Version 2.0 (the "License");

5

* you may not use this file except in compliance with the License.

6

* You may obtain a copy of the License at

7

*

8

* http://www.apache.org/licenses/LICENSE-2.0

9

*

10

* Unless required by applicable law or agreed to in writing, software

11

* distributed under the License is distributed on an "AS IS" BASIS,

12

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13

* See the License for the specific language governing permissions and

14

* limitations under the License.

15

*/

16

17

#ifndef ANDROID_SOUND_TRIGGER_H

18

#define ANDROID_SOUND_TRIGGER_H

19

20

#include <stdbool.h>

21

#include <system/audio.h>

22

23

#define SOUND_TRIGGER_MAX_STRING_LEN 64 /* max length of strings in properties or

24

descriptor structs */

25

#define SOUND_TRIGGER_MAX_LOCALE_LEN 6 /* max length of locale string. e.g en_US */

26

#define SOUND_TRIGGER_MAX_USERS 10 /* max number of concurrent users */

27

#define SOUND_TRIGGER_MAX_PHRASES 10 /* max number of concurrent phrases */

28

29

typedef enum {

30

SOUND_TRIGGER_STATE_NO_INIT = -1, /* The sound trigger service is not initialized */

31

SOUND_TRIGGER_STATE_ENABLED = 0, /* The sound trigger service is enabled */

32

SOUND_TRIGGER_STATE_DISABLED = 1 /* The sound trigger service is disabled */

33

} sound_trigger_service_state_t;

34

35

#define RECOGNITION_MODE_VOICE_TRIGGER 0x1 /* simple voice trigger */

36

#define RECOGNITION_MODE_USER_IDENTIFICATION 0x2 /* trigger only if one user in model identified */

37

#define RECOGNITION_MODE_USER_AUTHENTICATION 0x4 /* trigger only if one user in mode

38

authenticated */

39

#define RECOGNITION_STATUS_SUCCESS 0

40

#define RECOGNITION_STATUS_ABORT 1

41

#define RECOGNITION_STATUS_FAILURE 2

42

43

#define SOUND_MODEL_STATUS_UPDATED 0

44

45

typedef enum {

46

SOUND_MODEL_TYPE_UNKNOWN = -1, /* use for unspecified sound model type */

47

SOUND_MODEL_TYPE_KEYPHRASE = 0 /* use for key phrase sound models */

48

} sound_trigger_sound_model_type_t;

49

50

typedef struct sound_trigger_uuid_s {

51

unsigned int timeLow;

52

unsigned short timeMid;

53

unsigned short timeHiAndVersion;

54

unsigned short clockSeq;

55

unsigned char node[6];

56

} sound_trigger_uuid_t;

57

58

/*

59

* sound trigger implementation descriptor read by the framework via get_properties().

60

* Used by SoundTrigger service to report to applications and manage concurrency and policy.

61

*/

62

struct sound_trigger_properties {

63

char implementor[SOUND_TRIGGER_MAX_STRING_LEN]; /* implementor name */

64

char description[SOUND_TRIGGER_MAX_STRING_LEN]; /* implementation description */

65

unsigned int version; /* implementation version */

66

sound_trigger_uuid_t uuid; /* unique implementation ID.

67

Must change with version each version */

68

unsigned int max_sound_models; /* maximum number of concurrent sound models

69

loaded */

70

unsigned int max_key_phrases; /* maximum number of key phrases */

71

unsigned int max_users; /* maximum number of concurrent users detected */

72

unsigned int recognition_modes; /* all supported modes.

73

e.g RECOGNITION_MODE_VOICE_TRIGGER */

74

bool capture_transition; /* supports seamless transition from detection

75

to capture */

76

unsigned int max_buffer_ms; /* maximum buffering capacity in ms if

77

capture_transition is true*/

78

bool concurrent_capture; /* supports capture by other use cases while

79

detection is active */

80

bool trigger_in_event; /* returns the trigger capture in event */

81

unsigned int power_consumption_mw; /* Rated power consumption when detection is active

82

with TDB silence/sound/speech ratio */

83

};

84

85

typedef int sound_trigger_module_handle_t;

86

87

struct sound_trigger_module_descriptor {

88

sound_trigger_module_handle_t handle;

89

struct sound_trigger_properties properties;

90

};

91

92

typedef int sound_model_handle_t;

93

94

/*

95

* Generic sound model descriptor. This struct is the header of a larger block passed to

96

* load_sound_model() and containing the binary data of the sound model.

97

* Proprietary representation of users in binary data must match information indicated

98

* by users field

99

*/

100

struct sound_trigger_sound_model {

101

sound_trigger_sound_model_type_t type; /* model type. e.g. SOUND_MODEL_TYPE_KEYPHRASE */

102

sound_trigger_uuid_t uuid; /* unique sound model ID. */

103

sound_trigger_uuid_t vendor_uuid; /* unique vendor ID. Identifies the engine the

104

sound model was build for */

105

unsigned int data_size; /* size of opaque model data */

106

unsigned int data_offset; /* offset of opaque data start from head of struct

107

(e.g sizeof struct sound_trigger_sound_model) */

108

};

109

110

/* key phrase descriptor */

111

struct sound_trigger_phrase {

112

unsigned int id; /* keyphrase ID */

113

unsigned int recognition_mode; /* recognition modes supported by this key phrase */

114

unsigned int num_users; /* number of users in the key phrase */

115

unsigned int users[SOUND_TRIGGER_MAX_USERS]; /* users ids: (not uid_t but sound trigger

116

specific IDs */

117

char locale[SOUND_TRIGGER_MAX_LOCALE_LEN]; /* locale - JAVA Locale style (e.g. en_US) */

118

char text[SOUND_TRIGGER_MAX_STRING_LEN]; /* phrase text in UTF-8 format. */

};

/*

* Specialized sound model for key phrase detection.

123

* Proprietary representation of key phrases in binary data must match information indicated

124

* by phrases field

125

*/

126

struct sound_trigger_phrase_sound_model {

127

struct sound_trigger_sound_model common;

128

unsigned int num_phrases; /* number of key phrases in model */

129

struct sound_trigger_phrase phrases[SOUND_TRIGGER_MAX_PHRASES];

};

/*

* Generic recognition event sent via recognition callback

135

*/

136

struct sound_trigger_recognition_event {

137

int status; /* recognition status e.g.

138

RECOGNITION_STATUS_SUCCESS */

139

sound_trigger_sound_model_type_t type; /* event type, same as sound model type.

140

e.g. SOUND_MODEL_TYPE_KEYPHRASE */

141

sound_model_handle_t model; /* loaded sound model that triggered the

142

event */

143

bool capture_available; /* it is possible to capture audio from this

144

utterance buffered by the

145

implementation */

146

int capture_session; /* audio session ID. framework use */

147

int capture_delay_ms; /* delay in ms between end of model

148

detection and start of audio available

149

for capture. A negative value is possible

150

(e.g. if key phrase is also available for

151

capture */

152

int capture_preamble_ms; /* duration in ms of audio captured

153

before the start of the trigger.

154

0 if none. */

155

bool trigger_in_data; /* the opaque data is the capture of

156

the trigger sound */

157

audio_config_t audio_config; /* audio format of either the trigger in

158

event data or to use for capture of the

159

rest of the utterance */

160

unsigned int data_size; /* size of opaque event data */

161

unsigned int data_offset; /* offset of opaque data start from start of

162

this struct (e.g sizeof struct

163

sound_trigger_phrase_recognition_event) */

};

/*

* Confidence level for each user in struct sound_trigger_phrase_recognition_extra

168

*/

169

struct sound_trigger_confidence_level {

170

unsigned int user_id; /* user ID */

171

unsigned int level; /* confidence level in percent (0 - 100).

172

- min level for recognition configuration

173

- detected level for recognition event */

};

/*

* Specialized recognition event for key phrase detection

178

*/

179

struct sound_trigger_phrase_recognition_extra {

180

unsigned int id; /* keyphrase ID */

181

unsigned int recognition_modes; /* recognition modes used for this keyphrase */

182

unsigned int confidence_level; /* confidence level for mode RECOGNITION_MODE_VOICE_TRIGGER */

183

unsigned int num_levels; /* number of user confidence levels */

184

struct sound_trigger_confidence_level levels[SOUND_TRIGGER_MAX_USERS];

185

};

186

187

struct sound_trigger_phrase_recognition_event {

188

struct sound_trigger_recognition_event common;

189

unsigned int num_phrases;

190

struct sound_trigger_phrase_recognition_extra phrase_extras[SOUND_TRIGGER_MAX_PHRASES];

};

/*

* configuration for sound trigger capture session passed to start_recognition()

195

*/

196

struct sound_trigger_recognition_config {

197

audio_io_handle_t capture_handle; /* IO handle that will be used for capture.

198

N/A if capture_requested is false */

199

audio_devices_t capture_device; /* input device requested for detection capture */

200

bool capture_requested; /* capture and buffer audio for this recognition

201

instance */

202

unsigned int num_phrases; /* number of key phrases recognition extras */

203

struct sound_trigger_phrase_recognition_extra phrases[SOUND_TRIGGER_MAX_PHRASES];

204

/* configuration for each key phrase */

205

unsigned int data_size; /* size of opaque capture configuration data */

206

unsigned int data_offset; /* offset of opaque data start from start of this struct

207

(e.g sizeof struct sound_trigger_recognition_config) */

};

/*

* Event sent via load sound model callback

212

*/

213

struct sound_trigger_model_event {

214

int status; /* sound model status e.g. SOUND_MODEL_STATUS_UPDATED */

215

sound_model_handle_t model; /* loaded sound model that triggered the event */

216

unsigned int data_size; /* size of event data if any. Size of updated sound model if

217

status is SOUND_MODEL_STATUS_UPDATED */

218

unsigned int data_offset; /* offset of data start from start of this struct

219

(e.g sizeof struct sound_trigger_model_event) */

};

#endif // ANDROID_SOUND_TRIGGER_H