blob: 888f79a92a08a72cf55de6990a0523c340bb539b [file] [log] [blame]
/*
* ListenSoundModelLib.h
*
* This is the interface file of sound model library which provides
* the APIs to get sound model information and merge, delete functionality
* which will be used by STHAL.
*
* Copyright (c) 2019, The Linux Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of The Linux Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*======================================================================
DESCRIPTION : ListenSoundModelLibrary Version 3
====================================================================*/
#ifndef __LISTEN_SOUND_MODEL_LIB_V3_H__
#define __LISTEN_SOUND_MODEL_LIB_V3_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#if defined(_SML_NO_DLL)
#define DllFunc
#define CDECLARE
#elif defined(_MSC_VER) && defined(_EXPORT)
#define DllFunc __declspec( dllexport )
#define CDECLARE __cdecl
#elif defined(_MSC_VER)
#define DllFunc __declspec( dllimport )
#define CDECLARE __cdecl
#else // For other compiler like gcc
#define DllFunc
#define CDECLARE
#endif
#define LSMLIB_VERSION 2 // %%% some unique number that changes when API changes
// %%% could be == SoundModel version supported for SVA 2.0
#define MAX_STRING_LEN (100 * 2) // maximum byte size of string representing Unicode character string
// %%% NOTE: "MAX_STRING_LEN" replaces old constant "MAX_KEYWORD"
// SVA 2.0
// Keyword & User Identifier as zero terminated strings
typedef char * keywordId_t;
typedef char * userId_t;
typedef struct {
//state machine parameters
float minSnrOnset; // The minimum snr of frame that speech segment starts
float minSnrLeave; // The minimum snr of frame that speech segment ends
float snrFloor; // The minimum snr value assumed in the end point detector
float snrThresholds; // The minimum snr value of speech to verify
float forgettingFactorNoise; // The forgetting factor used for noise estimation
int numFrameTransientFrame; // the number of frames in the beginning that are used for noise estimate(valid only for online mode)
float minEnergyFrameRatio; // the number of frame are used for noise estimation = minenergyFrameRatio * #frames of input(valid only for batch mode)
//post processing parameters
//Note:
int numMinFramesInPhrase; // the minimum nubmer of samples for a speech phrase (targetted speech)
int numMinFramesInSpeech; //the minimum number of samples for a speech intereval
int numMaxFrameInSpeechGap; //the maximum allowable number of samples for a speech gap
int numFramesInHead; //the speech head
int numFramesInTail; //the speech tail
} listen_epd_params;
typedef struct {
int16_t *data; /* Audio samples ( in Raw PCM format: 16kHz, 16bit, mono ) */
uint32_t n_samples; /* number of audio samples */
} listen_user_recording;
typedef struct {
uint8_t *data; /* block of memory containing Model data */
uint32_t size; /* size of memory allocated for Model data */
} listen_language_model_type;
typedef struct {
uint8_t *data; /* block of memory containing Model data */
uint32_t size; /* size of memory allocated for Model data */
} listen_model_type;
// kwihyuk - comments will update
// %%% the numbering - names are up to you, as long as type + version is unique between SVA 1.0 and 2.0 SMs
typedef enum {
kKeywordModel = 1, /* Keyword model */
kUserKeywordModel = 2, /* Userkeyword model */
kTargetSoundModel = 3,
kMultiUserKeywordModel = 4, /* Multiple Keyword models */
kKeywordModelWithVop = 5, //kwihyuk
kSecondStageKeywordModel = 6, //kwihyuk
kSecondStageKeywordModelWithVop = 7, //kwihyuk
} listen_model_enum;
typedef enum {
kSuccess = 0,
kFailure = 1
} listen_detection_status_enum;
typedef struct {
listen_model_enum type; /* model type: Keyword, User, TargetSound */
uint32_t version; /* model version */
uint32_t size; /* total size of the model: header + payload size */
} listen_sound_model_info;
typedef struct {
listen_detection_status_enum status; // SUCCESS or FAILURE
uint32_t size; // size in bytes of payload data
// just contains result confidence level values
uint8_t *data; // block of memory containing data payload
} listen_event_payload;
// SVA 2.0
typedef struct {
uint16_t numKeywords; /* total number of keywords */
uint16_t numUsers; /* total number of users */
uint16_t numActiveUserKeywordPairs; /* total number of active user+keyword pairs in SM */
bool isStripped; /* if corresponding keyword is stripped or not */
uint16_t *langPerKw; /* Language code of each keyword */
/* number active Users per keyword - included as convenience */
uint16_t *numUsersSetPerKw;
bool *isUserDefinedKeyword;
/* Ordered 'truth' table of all possible pairs of users for each keyword.
* Active entries marked with 1, inactive 0.keywordPhrase
* 16-bit short (rather than boolean) is used to match SM model data size */
uint16_t **userKeywordPairFlags;
uint16_t model_indicator; /* for SM 3.0, indicate which models were combined */
} listen_sound_model_header;
// SVA 2.0
// %%% this should match the 'sensitivity' data structure input in VoiceWakeupParamType
typedef struct {
uint8_t size ; // number of keyword plus activePair confidence levels set d
uint8_t *pConfLevels; // level of each keyword and each active user+keyword pair
} listen_confidence_levels ;
// SVA 2.0
typedef enum {
kSingleKWDetectionEvent = 1, /* SVA 1.0 model */
kMultiKWDetectionEvent = 2, /* SVA 2.0 model */
} listen_detection_type_enum;
// duplicates existing SVA1.0 typedef
// Do not include listen_detection_entry_v1 in SVA 1.0 header if both headers included
typedef struct {
char keyword[MAX_STRING_LEN];
uint16_t keywordConfidenceLevel;
uint16_t userConfidenceLevel;
} listen_detection_event_v1;
// denotes that a particular entry in confidence level array is not active
static const uint8_t NO_CONF_LEVEL = 0;
typedef struct {
char keywordPhrase[MAX_STRING_LEN]; /* string containing phrase string of keyword with highest confidence score */
char userName[MAX_STRING_LEN]; /* string containing name of user with highest confidence score */
uint8_t highestKeywordConfidenceLevel; // set to zero if detection status is Failed
uint8_t highestUserConfidenceLevel; // set to zero if detection status is Failed
listen_confidence_levels pairConfidenceLevels; // confidence levels of ALL pair (active or not)
} listen_detection_event_v2;
// modified for SVA 2.0 - this should override SVA 1.0 typedef
typedef struct {
// %%% uint16_t version;
listen_detection_type_enum detection_data_type;
// data structure filled is based on detection_data_type
union {
listen_detection_event_v1 event_v1; // for SVA 1.0
listen_detection_event_v2 event_v2; // for SVA 2.0
} event;
} listen_detection_event_type;
typedef enum {
kSucess = 0,
kFailed = 1,
kBadParam,
kKeywordNotFound,
kUserNotFound,
kUserKwPairNotActive,
kSMVersionUnsupported,
kUserDataForKwAlreadyPresent,
kDuplicateKeyword,
kDuplicateUserKeywordPair,
kMaxKeywordsExceeded,
kMaxUsersExceeded,
kEventStructUnsupported, // payload contains event data that can not be processed, or mismatches SM version
kLastKeyword,
kNoSignal,
kLowSnr,
kRecordingTooShort,
kRecordingTooLong,
kNeedRetrain,
kUserUDKPairNotRemoved,
kCannotCreateUserUDK,
kOutputArrayTooSmall,
kTooManyAbnormalUserScores,
kWrongModel,
kWrongModelAndIndicator,
kDuplicateModel,
} listen_status_enum;
/*
* Notes:
* 1. The client code that calls getKeywordPhrases(), getUserNames() must allocate DstStr as [MAX_STRING_LEN]
* 2. The client code that calls getUserKeywordModelSize(), createUserDefinedKeywordModel(),
* createUserKeywordModel() should assign meaningful string for keywordId or userId, empty string is not recommended.
* 3. verifyUserRecording() should be called before calling createUserKeywordModel(). If pConfidenceLevel returned
* in verifyUserRecording() is below a CONFIDENCE_THRESHOLD value, the recording should be rejected and not used
* to a create user model. The client code should decide the CONFIDENCE_THRESHOLD value, a recommended value range for
* CONFIDENCE_THRESHOLD is 60~70.
*
*/
/*
* findKeywordEndPosition
*
* Returns the keyword end position of user recordings from the keyword model
* the keyword model finds the keyword end position by using keyword end finding algorithm inside SVA
*
* Param [in] pKeywordModel - pointer to keyword model data which will be used to find keyword end
* Param [in] keywordId - null terminated string contains keyword phrase string
* Param [in] pUserRecording - a single recording of user speaking keyword
other speech such as following command may follows
* Param [out] pKendPosition - returns keyword end position from the start of user recording in number of samples
*
* Return - status
* kBadParam - When any input pointer (except pEpdParameter) is NULL
* kKeywordNotFound - When keywordId not exist in the model
* kSMVersionUnsupported - When pKeywordModel is not 2.0 model
*/
DllFunc listen_status_enum CDECLARE findKeywordEndPosition(
listen_model_type *pKeywordModel,
keywordId_t keywordId,
listen_user_recording *pUserRecording,
uint32_t *pKendPosition);
/*
* verifyUserRecording
*
* Returns the confidence level ( 0 ~ 100 ) that user recording matches keyword
* User data is to be appended for a specific keyword in the model
* // will be updated or removed
* if input is SM 3.0 which combiend with GMM and other sound models,
* then parsing to GMM model and run same procedure.
*
* Param [in] pKeywordModel - pointer to user-independent keyword model data
* Param [in] keywordId - null terminated string contains keyword phrase string
* Param [in] listen_epd_params - epd parameter
if null is passing, default epd parameter will be used internally
* Param [in] pUserRecording - a single recording of user speaking keyword
* Param [out] pConfidenceLevel - returns confidence level returned by keyword detection
*
* Return - status
* kBadParam - When any input pointer (except pEpdParameter) is NULL
* kKeywordNotFound - When keywordId not exist in the model
* kSMVersionUnsupported - When pKeywordModel is not 2.0 (have to contain GMM) model
* kLowSnr - When user recording is too noisy
* kNoSignal - When user recording is non-speech
*/
DllFunc listen_status_enum CDECLARE verifyUserRecording(
listen_model_type *pKeywordModel,
keywordId_t keywordId, // add for SVA 2.0
listen_epd_params *pEpdParameter,
listen_user_recording *pUserRecording,
int16_t *pConfidenceLevel);
/*
* checkUserRecording
*
* Returns the status of user recordings that if user recording has problem with SNR(Signal Noise Ratio) and length
*
* Param [in] pLanguageModel - pointer to language model
* Param [in] pEpdParameter - pointer to EPD parameters
* Default parameter will be used if eEpdParameter is NULL
* Param [in] pUserRecording - User recording that is going to be tested
* Param [out] pOutSnr - SNR of user recording
* Param [in] maxPhonemeLength (optional parameter) - maximum phoneme length allowed for each user recording
* - It is optional parameter, whose default value is 0.
*
* Return - status
* kBadParam - When any input pointer (except pEpdParameter) is NULL
* kLowSnr - When user recording is too noisy
* kNoSignal - When user recording is non-speech
* kRecordingTooShort - When user recording is too short
*/
DllFunc listen_status_enum CDECLARE checkUserRecording(
listen_language_model_type *pLanguageModel,
listen_epd_params *pEpdParameter,
listen_user_recording *pUserRecording,
float *pOutSnr,
uint32_t maxPhonemeLength);
/*
* checkRecordingsQuality
*
* Returns the status of the last user recording in recording array that
* if user recording has problem with SNR(Signal Noise Ratio) and length
* Check the consistency of the input recordings if numUserRecording > 1
*
* Param [in] pLanguageModel - pointer to language model
* Param [in] pEpdParameter - pointer to EPD parameters
* Default parameter will be used if eEpdParameter is NULL
* Param [in] numUserRecording - number of input recordings
* Param [in] pUserRecordings - User recordings those are going to be tested
* Param [out] pOutSnr - SNR of user recording
*
* Return - status
* kBadParam - When any input pointer (except pEpdParameter) is NULL
* kLowSnr - When user recording is too noisy
* kNoSignal - When user recording is non-speech
* kRecordingTooShort - When user recording is too short
*/
DllFunc listen_status_enum CDECLARE checkRecordingsQuality(
listen_language_model_type *pLanguageModel,
listen_epd_params *pEpdParameter,
uint32_t numUserRecording,
listen_user_recording *pUserRecordings[],
float *pOutSnr);
/*
* tuneUserDefinedKeywordModelThreshold
*
* This function tunes threshold of user defined keyword.
*
* This function can be used when programmer want to make testing stage after training stage of user defined keyword
* even though threshold of user defined keyword is automatically tunned when create user defined keyword,
* this function can be useful when tune more threshold of user defined keyword
*
* Param [in] pUserDefinedKeyword - pointer to user defined keyword
* Param [in] keywordId - keyword spell
* Param [in] pUserRecording - user recording from testing stage
* Param [out] pOutputUserDefinedKeyword - tunned user defined keyword
*
* Return - listen_status_enum
* Return - status
* kBadParam - When any input pointer is NULL, or pUserDefinedKeyword is not UDK
* kKeywordNotFound - When keywordId not exist in the model
*/
DllFunc listen_status_enum CDECLARE tuneUserDefinedKeywordModelThreshold(
listen_model_type *pUserDefinedKeyword,
keywordId_t keywordId,
listen_user_recording *pUserRecording,
listen_model_type *pOutputUserDefinedKeyword);
/*
* getUserDefinedKeywordSize
*
* Get the size required to hold user defined keyword model that extends given keyword model
* with give user data
*
* Param [in] pUserDefinedKeyword - pointer to previous user defined keyword
if pUserDefinedKeyword is NULL, this will create new user defined keyword model
if pUserDefinedKeyword is not NULL, this will train incrementally ( not supported now )
* Param [in] keywordId - keyword spell of user defined keyword
* Param [in] userId - user spell of user defined keyword
* Param [in] pEpdParameter - epd parameter which is used for chopping user recording.
if eEpdParameter is NULL, default parameter will be used
* Param [in] numUserRecording - number of user recording
* Param [in] pUserRecordings[] - multiple recording of user speaking keyword
* Param [in] pLanguageModel - language model
* Param [out] pOutputSize - pointer to where output model size will be written
*
* Return - listen_status_enum
* Return - status
* kBadParam - When any input pointer (except pUserDefinedKeyword, pEpdParameter) is NULL, or pLanguageModel is fake
* kNoSignal - When user recording is non-speech
*/
DllFunc listen_status_enum CDECLARE getUserDefinedKeywordSize(
listen_model_type *pUserDefinedKeyword,
keywordId_t keywordId,
userId_t userId,
listen_epd_params *pEpdParameter,
uint32_t numUserRecording,
listen_user_recording *pUserRecordings[],
listen_language_model_type *pLanguageModel,
uint32_t *pOutputSize);
/*
* getUserDefinedKeywordApproxSize
*
* Get the size required to hold user-keyword model that extends given keyword model
* with give user data
*
* Param [in] keywordId - null terminated string containing keyword phrase string
* Param [in] userId - null terminated string containing user name string
* Param [in] pLanguageModel - pointer to language model data
* Param [out] pOutputSize - size of approximated output model
* Param [in] maxPhonemeLength (optional parameter) - maximum phoneme length allowed for each user recording
* - It is optional parameter, whose default value is 0.
*
* Return - status
* kBadParam - When any input pointer is NULL
*/
DllFunc listen_status_enum CDECLARE getUserDefinedKeywordApproxSize(
keywordId_t keywordId,
userId_t userId,
listen_language_model_type *pLanguageModel,
uint32_t *pOutputSize,
uint32_t maxPhonemeLength);
/*
* createUserDefinedKeywordModel
*
* Description : Create User Defined Keyword Model
*
* Param [in] pUserDefinedKeyword - pointer to previous user defined keyword
if pUserDefinedKeyword is NULL, this will create new user defined keyword model
if pUserDefinedKeyword is not NULL, this will train incrementally ( not supported now )
* Param [in] keywordId - keyword spell of user defined keyword
* Param [in] userId - user spell of user defined keyword
* Param [in] pEpdParameter - epd parameter which is used for chopping user recording.
if eEpdParameter is NULL, default parameter will be used
* Param [in] numUserRecording - number of user recording
* Param [in] pUserRecordings[] - multiple recording of user speaking keyword
* Param [in] pLanguageModel - language model
* Param [out] pOutputUserDefinedKeyword - pointer to where output model will be written
* Param [out] pMatchingScore - pointer to matching score
*
* Return - listen_status_enum
* Return - status
* kBadParam - When any input pointer (except pUserDefinedKeyword, pEpdParameter) is NULL, or pLanguageModel is fake
* kNoSignal - When user recording is non-speech
* kCannotCreateUserUDK - When creation process fails somewhere
* kOutputArrayTooSmall - When output size is smaller than actual udk model size
*/
DllFunc listen_status_enum CDECLARE createUserDefinedKeywordModel(
listen_model_type *pUserDefinedKeyword,
keywordId_t keywordId,
userId_t userId,
listen_epd_params *pEpdParameter,
uint32_t numUserRecording,
listen_user_recording *pUserRecordings[],
listen_language_model_type *pLanguageModel,
listen_model_type *pOutputUserDefinedKeyword,
int16_t *pMatchingScore);
/*
* getStrippedUserKeywordModelSize
*
* Return stripped model size
*
* Param[in] pModel - pointer to (user)keyword model data
* Param[out] nStrippedModelSize - return model size of stripped model
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pModel is not 2.0 model
*
*/
DllFunc listen_status_enum CDECLARE getStrippedUserKeywordModelSize(
listen_model_type *pModel,
uint32_t *nStrippedModelSize);
/*
* stripUserKeywordModel
*
* Return stripped model
*
* Param[in] pModel - pointer to (user)keyword model data
* Param[out] pStrippedModel - pointer to stripped model data
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pModel is not 2.0 model
*
*/
DllFunc listen_status_enum CDECLARE stripUserKeywordModel(
listen_model_type *pModel,
listen_model_type *pStrippedModel);
/*
* getUserKeywordModelSize
*
* Get the size required to hold user-keyword model that extends given keyword model
* with give user data
*
* Param [in] pKeywordModel - pointer to keyword model data
* Param [in] keywordId - null terminated string containing keyword phrase string
* Param [in] userId - null terminated string containing user name string
* Param [out] nUserKeywordModelSize - size of user keyword model
*
* Return - status
* kBadParam - When any input pointer is NULL
* kKeywordNotFound - When keywordId not exist in the model
* kSMVersionUnsupported - When pKeywordModel is not 2.0/3.0 model
*/
DllFunc listen_status_enum CDECLARE getUserKeywordModelSize(
listen_model_type *pKeywordModel,
keywordId_t keywordId, // add for SVA 2.0
userId_t userId, // add for SVA 2.0
uint32_t *nUserKeywordModelSize);
/*
* createUserKeywordModel
*
* Create a user keyword model
* Writes the user keyword model into given memory location
*
* Param [in] pKeywordModel - pointer to Keyword model or User keyword model data
if it is keyword model, create user-keyword model
if it is user keyword model, incrementally train user keyword model
* Param [in] keywordId - user data is to be appended for keyword in model with given identifier
* Param [in] userId - identifier of user data is created
* If identifier is already used, will replace existing user data with newly created data.
* The User Name is passed to this function so that if this is the first time user data is
* being added for a new user, the User's Name can be stored in the SM
* Param [in] pEpdParameter - end point detection parameter
* if eEpdParameter is NULL, default parameter will be used
* Param [in] numUserRecording - number of user recordings
* Param [in] pUserRecordings - multiple recording of user speaking keyword
* Param [out] pUserKeywordModel - pointer to where user keyword model data is to be written
* Param [out] pUserMatchingScore - pointer to user matching score
* Return - status
* kBadParam - When any input pointer (except pEpdParameter) is NULL
* kKeywordNotFound - When keywordId not exist in the model
* kSMVersionUnsupported - When pKeywordModel is not 2.0 or 3.0 model
* kLowSnr - When user recording is too noisy
* kNoSignal - When user recording is non-speech
* kCannotCreateUserUDK - When pKeywordModel is UDK model
*/
DllFunc listen_status_enum CDECLARE createUserKeywordModel(
listen_model_type *pKeywordModel,
keywordId_t keywordId, // add for SVA 2.0
userId_t userId, // add for SVA 2.0
listen_epd_params *pEpdParameter,
uint32_t numUserRecording,
listen_user_recording *pUserRecordings[],
listen_model_type *pUserKeywordModel,
int16_t *pUserMatchingScore);
// Since size of new SM after removing data will be less than or equal to size of
// input SM, this function could be optional and size of pInputModel could be used
// to allocate memory for pResultModel when deleteFromModel() called.
/*
* getSizeAfterDeleting
*
* Return the size of sound model after removing data from a given SM for either
* a keyword, a user, or a specific user+keyword pair.
*
* Param [in] pInputModel - pointer to sound model
* Param [in] keywordId - data for this keyword in model with given identifier is removed
* If userId is 'null', then all keyword-only data and all user data associated
* with the given non-null keywordId is removed.
* If userId is also non-null, then only data associated with the userId+keywordId
* pair is removed.
* Param [in] userId - all data for this user in model with given identifier is removed
* If keywordId is 'null', then all all user data for the given non-null userId
* is removed.
* If keywordId is also non-null, then only data associated with the userId+keywordId
* pair is removed.
* Param [out] nOutputModelSize - outputs size of resulting soundmodel after removing data.
* Return - status
* kBadParam - When any input pointer (except keywordId, userId) is NULL
* kLastKeyword - When pInputModel has only one keyword
* kSMVersionUnsupported - When pInputModel is not 2.0 model
* kKeywordNotFound - When keywordId not exist in the model
* kUserNotFound - When userId not exist in the model
* kUserKWPairNotActive - When <keywordId, userId> pair not exist in the model
* kUserUDKPairNotRemoved - When <keywordId, userId> pair to delete is UDK
*/
DllFunc listen_status_enum CDECLARE getSizeAfterDeleting(
listen_model_type *pInputModel,
keywordId_t keywordId, // add for SVA 2.0
userId_t userId, // add for SVA 2.0
uint32_t *nOutputModelSize);
// If getSizeAfterDeleting() supported, call it get size of new sound model after
// removing desired data from given input sound model, and
// allocate ResultModel with this size
// Otherwise, use size of input SoundModel since size of ResultModel will be
// less than or equal to size of input SoundModel.
/*
* deleteFromModel
*
* Return a new sound model after removing data from a given SM for a keyword, a user,
* or a user+keyword pair.
*
* Param [in] pInputModel - pointer to sound model
* Param [in] keywordId - data for this keyword in model with given identifier is removed
* If userId is 'null', then all keyword-only data and all user data associated
* with the given non-null keywordId is removed.
* If userId is also non-null, then only data associated with the userId+keywordId
* pair is removed.
* Param [in] userId - all data for this user in model with given identifier is removed
* If keywordId is 'null', then all all user data for the given non-null userId
* is removed.
* If keywordId is also non-null, then only data associated with the userId+keywordId
* pair is removed.
* Param [out] pResultModel - pointer to where user keyword model data is to be written
* Return - status
* kBadParam - When any input pointer (except keywordId, userId) is NULL
* kLastKeyword - When pInputModel has only one keyword
* kSMVersionUnsupported - When pInputModel is not 2.0 or 3.0 model
* kKeywordNotFound - When keywordId not exist in the model
* kUserNotFound - When userId not exist in the model
* kUserKWPairNotActive - When <keywordId, userId> pair not exist in the model
* kUserUDKPairNotRemoved - When <keywordId, userId> pair to delete is UDK
*/
DllFunc listen_status_enum CDECLARE deleteFromModel(
listen_model_type *pInputModel,
keywordId_t keywordId, // add for SVA 2.0
userId_t userId, // add for SVA 2.0
listen_model_type *pResultModel);
/*
* getMergedModelSize
*
* Return the size of sound model after merging required models
*
* Param [in] numModels - number of model files to be merged
* Param [in] pModels - array of pointers to Keyword or User keyword model data
* Param [out] nOutputModelSize - outputs size of resulting soundmodel after merging models
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pInputModel is not 2.0 model
* kDuplicateKeyword - When same keywordId exists in more than 1 model
* kDuplicateUserKeywordPair
* kMaxKeywordsExceeded
* kMaxUsersExceeded,
*/
DllFunc listen_status_enum CDECLARE getMergedModelSize(
uint16_t numModels,
listen_model_type *pModels[],
uint32_t *nOutputModelSize);
/*
* mergeModels
*
* merges two or more Sound Models
*
* Writes the new merged model into given memory location
*
* Param [in] numModels - number of model files to be merged
* Param [in] pModels - array of pointers to Keyword or User keyword model data
* Param [out] pMergedModel - pointer to where merged model data is to be written
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pInputModel is not 2.0 model
* kDuplicateKeyword - When same keywordId exists in more than 1 model
* kDuplicateUserKeywordPair - N/A to current version
* kMaxKeywordsExceeded - N/A to current version
* kMaxUsersExceeded - N/A to current version
*/
DllFunc listen_status_enum CDECLARE mergeModels(
uint16_t numModels,
listen_model_type *pModels[],
listen_model_type *pMergedModel);
/* ParseFromBigSoundModel
*
* Parse the big 3.0 sound model to individual sub models
*
* Param [in] *pSM3p0Model – The pointer to the big 3.0 sound model
* Param [out] p1stStageModel - pointer to parsed 1stStageModel
* Param [out] p2ndStageKWModel - pointer to parsed 2nd stage KW Model
* Param [out] p2stStageVoPModel - pointer to parsed 2nd stage VoP Model
* Param [out] indicator – returned indicator, indicating what types of models parsed/returned
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pInputModel is not 3.0 model
* kDuplicateKeyword - When same keywordId exists in more than 1 model
* kWrongModel - when input is broken
*/
DllFunc listen_status_enum CDECLARE parseFromBigSoundModel(
listen_model_type *pSM3p0Model,
listen_model_type *p1stStageModel,
listen_model_type *p2ndStageKWModel,
listen_model_type *p2stStageVoPModel,
uint16_t *indicator );
/*
* parseDetectionEventData
*
* parse event payload into detection event.
*
* Version of input SM will detemine DetectionType created/returned
*
* Param [in] pUserKeywordModel - pointer to keyword or user keyword model data
* Param [in] pEventPayload - pointer to received event payload data
* Param [out] pDetectEvent - pointer to where detection event data is to be written
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pUserKeywordModel is not 2.0 model
* kEventStructUnsupported - When pEventPayload->size != numKeywords + numActiveUsers
*/
DllFunc listen_status_enum CDECLARE parseDetectionEventData(
listen_model_type *pUserKeywordModel,
listen_event_payload *pEventPayload,
listen_detection_event_type *pDetectionEvent);
// Declared in both SVA 1.0 and SVA 2.0 versions and SML 3.0 of ListenSoundModelMLib
//
/*
* querySoundModel
*
* Returns the information about a sound model
* Sound model could be of any type: Keyword, UserKeyword, TargetSound,...
* Sound model could be any versions
*
* Param [in] pSoundModel - pointer to model data
* Param [out] pListenSoundModelInfo - returns information about the give sound model
*
* Return - status
* kBadParam - When any input pointer is NULL
* kFailed - When input model failed to be decoded
* kSMVersionUnsupported - When pSoundModel is fake model (invalid model other than 1.0 model, 2.0 model and 3.0 model)
*/
DllFunc listen_status_enum CDECLARE querySoundModel(
listen_model_type *pSoundModel,
listen_sound_model_info *pListenSoundModelInfo);
/*
* getSoundModelHeader
*
* Returns additional information about the sound model
* Sound model could be of any type: Keyword, UserKeyword, TargetSound,...
* Keyword
*
* Param [in] pSoundModel - pointer to model data
* Param [out] pListenSoundModelHeader - returns header field from sound model
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pSoundModel is not 2.0 or 3.0 model
*/
DllFunc listen_status_enum CDECLARE getSoundModelHeader(
listen_model_type *pSoundModel,
listen_sound_model_header *pListenSoundModelHeader);
/*
* release sound model header
*
* deallocate sound model header
* Return - status
* kBadParam - When any input pointer is NULL
*/
DllFunc listen_status_enum CDECLARE releaseSoundModelHeader(
listen_sound_model_header *pListenSoundModelHeader);
/*
* getKeywordPhrases
*
* Get keyword phrase string for all Keywords defined in given SM 2.0 / 3.0
*
* App calling this function must allocate memory for all phrases
* by getting the number of keywords from querySoundModel() and allocating
* totalNumKeywords*MAX_STRING_LEN
* This function copies phrases into this allocated keywords array
*
* Param [in] pSoundModel - pointer to model data
* Param [in/out] numKeywords - [in] number of string entries allocated in keywords array
* [out] number of keyword phrase strings copied keyword array
* Param [out] keywords - array of keyword phrase null-terminated strings
*
* Return - status
* kBadParam - When any input pointer is NULL, or numKeywords < real keywords number
* kSMVersionUnsupported - When pSoundModel is not 2.0 or 3.0 model
*/
DllFunc listen_status_enum CDECLARE getKeywordPhrases(
listen_model_type *pSoundModel,
uint16_t *numKeywords,
keywordId_t *keywords);
/*
* getUserNames
*
* Get user names for user data associated with a given SM 2.0 / 3.0
*
* App calling this function must allocate memory for all names
* by getting the number of users from querySoundModel() and allocating
* totalNumUsers*MAX_STRING_LEN
* This function copies names into this allocated keywords array
*
* Param [in] pSoundModel - pointer to model data
* Param [in/out] numUsers - [in] number of string entries allocated in users array
* [out] number of user name strings copied users array
* Param [out] users - array of user name null-terminated strings
*
* Return - status
* kBadParam - When any input pointer is NULL, or numUsers < real users number, or pSoundModel is keyword-only model
* kSMVersionUnsupported - When pSoundModel is not 2.0 or 3.0 model
*/
DllFunc listen_status_enum CDECLARE getUserNames(
listen_model_type *pSoundModel,
uint16_t *numUsers,
userId_t *users);
/*
* loadConfParams
*
* Load configurable parameters to the sound model library
*
* Param [in] pConfData - pointer to param data
* Param [in] confDataSize - size of memory allocated for param data
*
* Return - status
* kBadParam - When any input pointer is NULL
*/
DllFunc listen_status_enum CDECLARE loadConfParams(
uint8_t *pConfData,
uint32_t confDataSize);
/*
* getBinaryModelSize
*
* Return binary model size
*
* Param[in] pListenModel - pointer to (user)keyword model data
* Param[out] nBinaryModelSize - return model size of binary model
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - When pModel is not 2.0 or 3.0 model
*
*/
DllFunc listen_status_enum CDECLARE getBinaryModelSize(
listen_model_type *pListenModel,
uint32_t *nBinaryModelSize);
/*
* getSortedKeywordStatesUserKeywordModelSize
*
* Return sorted model size
*
* Param[in] pModel - pointer to (user)keyword model data
* Param[out] nSortedModelSize - return model size of sorted keyword states model
*
* Return - status
* kBadParam - When any input pointer is NULL
*
*/
DllFunc listen_status_enum CDECLARE getSortedKeywordStatesUserKeywordModelSize(
listen_model_type *pModel,
uint32_t *nSortedModelSize);
/*
* sortKeywordStatesOfUserKeywordModel
*
* Return sorted model
*
* Param[in] pInputModel - pointer to (user)keyword model data
* Param[out] pSortedModel - pointer to sorted keyword states model data
*
* Return - status
* kBadParam - When any input pointer is NULL
* kSMVersionUnsupported - when pModel is not 2.0 model
*
*/
DllFunc listen_status_enum CDECLARE sortKeywordStatesOfUserKeywordModel(
listen_model_type *pInputModel,
listen_model_type *pSortedModel);
#ifdef __cplusplus
};
#endif
#endif /* __LISTEN_SOUND_MODEL_LIB_V3_H__ */