media/libstagefright/codecs/amrwbenc/src/wb_vad.c - fp2-dev/platform/frameworks/av - Gitiles

 /*
  ** Copyright 2003-2010, VisualOn, Inc.
  **
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  **
  **     http://www.apache.org/licenses/LICENSE-2.0
  **
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  */

 /***********************************************************************
 *      File: wb_vad.c                                                  *
 *                                                                      *
 *      Description: Voice Activity Detection                           *
 *                                                                      *
 ************************************************************************/

 #include <stdlib.h>
 #include <stdio.h>
 #include "cnst.h"
 #include "wb_vad.h"
 #include "typedef.h"
 #include "basic_op.h"
 #include "math_op.h"
 #include "wb_vad_c.h"
 #include "mem_align.h"

 /******************************************************************************
 *  Calculate Log2 and scale the signal:
 *
 *    ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
 *
 *  input   output
 *  32768   16384
 *  1       31744
 *
 * When input is in the range of [1,2^16], max error is 0.0380%.
 *********************************************************************************/

 static Word16 ilog2(                       /* return: output value of the log2 */
 		Word16 mant                        /* i: value to be converted */
 		)
 {
 	Word16 ex, ex2, res;
 	Word32 i, l_temp;

 	if (mant <= 0)
 	{
 		mant = 1;
 	}
 	ex = norm_s(mant);
 	mant = mant << ex;

 	for (i = 0; i < 3; i++)
 		mant = vo_mult(mant, mant);
 	l_temp = vo_L_mult(mant, mant);

 	ex2 = norm_l(l_temp);
 	mant = extract_h(l_temp << ex2);

 	res = (ex + 16) << 10;
 	res = add1(res, (ex2 << 6));
 	res = vo_sub(add1(res, 127), (mant >> 8));
 	return (res);
 }

 /******************************************************************************
 *
 *     Function     : filter5
 *     Purpose      : Fifth-order half-band lowpass/highpass filter pair with
 *                    decimation.
 *
 *******************************************************************************/

 static void filter5(
 		Word16 * in0,                         /* i/o : input values; output low-pass part  */
 		Word16 * in1,                         /* i/o : input values; output high-pass part */
 		Word16 data[]                         /* i/o : filter memory                       */
 		)
 {
 	Word16 temp0, temp1, temp2;

 	temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
 	temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
 	data[0] = temp0;

 	temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
 	temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
 	data[1] = temp0;

 	*in0 = extract_h((vo_L_add(temp1, temp2) << 15));
 	*in1 = extract_h((vo_L_sub(temp1, temp2) << 15));
 }

 /******************************************************************************
 *
 *     Function     : filter3
 *     Purpose      : Third-order half-band lowpass/highpass filter pair with
 *                    decimation.
 *
 *******************************************************************************/

 static void filter3(
 		Word16 * in0,                         /* i/o : input values; output low-pass part  */
 		Word16 * in1,                         /* i/o : input values; output high-pass part */
 		Word16 * data                         /* i/o : filter memory                       */
 		)
 {
 	Word16 temp1, temp2;

 	temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
 	temp2 = add1(*data, vo_mult(COEFF3, temp1));
 	*data = temp1;

 	*in1 = extract_h((vo_L_sub(*in0, temp2) << 15));
 	*in0 = extract_h((vo_L_add(*in0, temp2) << 15));
 }

 /******************************************************************************
 *
 *     Function   : level_calculation
 *     Purpose    : Calculate signal level in a sub-band. Level is calculated
 *                  by summing absolute values of the input data.
 *
 *                  Signal level calculated from of the end of the frame
 *                  (data[count1 - count2]) is stored to (*sub_level)
 *                  and added to the level of the next frame.
 *
 ******************************************************************************/

 static Word16 level_calculation(                      /* return: signal level */
 		Word16 data[],                        /* i   : signal buffer                                    */
 		Word16 * sub_level,                   /* i   : level calculated at the end of the previous frame*/
 		                                      /* o   : level of signal calculated from the last         */
 		                                      /*       (count2 - count1) samples                        */
 		Word16 count1,                        /* i   : number of samples to be counted                  */
 		Word16 count2,                        /* i   : number of samples to be counted                  */
 		Word16 ind_m,                         /* i   : step size for the index of the data buffer       */
 		Word16 ind_a,                         /* i   : starting index of the data buffer                */
 		Word16 scale                          /* i   : scaling for the level calculation                */
 		)
 {
 	Word32 i, l_temp1, l_temp2;
 	Word16 level;

 	l_temp1 = 0L;
 	for (i = count1; i < count2; i++)
 	{
 		l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
 	}

 	l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
 	*sub_level = extract_h(L_shl(l_temp1, scale));

 	for (i = 0; i < count1; i++)
 	{
 		l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
 	}
 	level = extract_h(L_shl2(l_temp2, scale));

 	return level;
 }

 /******************************************************************************
 *
 *     Function     : filter_bank
 *     Purpose      : Divide input signal into bands and calculate level of
 *                    the signal in each band
 *
 *******************************************************************************/

 static void filter_bank(
 		VadVars * st,                         /* i/o : State struct               */
 		Word16 in[],                          /* i   : input frame                */
 		Word16 level[]                        /* o   : signal levels at each band */
 		)
 {
 	Word32 i;
 	Word16 tmp_buf[FRAME_LEN];

 	/* shift input 1 bit down for safe scaling */
 	for (i = 0; i < FRAME_LEN; i++)
 	{
 		tmp_buf[i] = in[i] >> 1;
 	}

 	/* run the filter bank */
 	for (i = 0; i < 128; i++)
 	{
 		filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
 	}
 	for (i = 0; i < 64; i++)
 	{
 		filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
 		filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
 	}
 	for (i = 0; i < 32; i++)
 	{
 		filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
 		filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
 		filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
 	}
 	for (i = 0; i < 16; i++)
 	{
 		filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
 		filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
 		filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
 	}

 	for (i = 0; i < 8; i++)
 	{
 		filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
 		filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
 	}

 	/* calculate levels in each frequency band */

 	/* 4800 - 6400 Hz */
 	level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);
 	/* 4000 - 4800 Hz */
 	level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);
 	/* 3200 - 4000 Hz */
 	level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);
 	/* 2400 - 3200 Hz */
 	level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);
 	/* 2000 - 2400 Hz */
 	level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);
 	/* 1600 - 2000 Hz */
 	level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);
 	/* 1200 - 1600 Hz */
 	level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);
 	/* 800 - 1200 Hz */
 	level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);
 	/* 600 - 800 Hz */
 	level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17);
 	/* 400 - 600 Hz */
 	level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);
 	/* 200 - 400 Hz */
 	level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);
 	/* 0 - 200 Hz */
 	level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17);
 }

 /******************************************************************************
 *
 *     Function   : update_cntrl
 *     Purpose    : Control update of the background noise estimate.
 *
 *******************************************************************************/

 static void update_cntrl(
 		VadVars * st,                         /* i/o : State structure                    */
 		Word16 level[]                        /* i   : sub-band levels of the input frame */
 		)
 {
 	Word32 i;
 	Word16 num, temp, stat_rat, exp, denom;
 	Word16 alpha;

 	/* if a tone has been detected for a while, initialize stat_count */
 	if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
 	{
 		st->stat_count = STAT_COUNT;
 	} else
 	{
 		/* if 8 last vad-decisions have been "0", reinitialize stat_count */
 		if ((st->vadreg & 0x7f80) == 0)
 		{
 			st->stat_count = STAT_COUNT;
 		} else
 		{
 			stat_rat = 0;
 			for (i = 0; i < COMPLEN; i++)
 			{
 				if(level[i] > st->ave_level[i])
 				{
 					num = level[i];
 					denom = st->ave_level[i];
 				} else
 				{
 					num = st->ave_level[i];
 					denom = level[i];
 				}
 				/* Limit nimimum value of num and denom to STAT_THR_LEVEL */
 				if(num < STAT_THR_LEVEL)
 				{
 					num = STAT_THR_LEVEL;
 				}
 				if(denom < STAT_THR_LEVEL)
 				{
 					denom = STAT_THR_LEVEL;
 				}
 				exp = norm_s(denom);
 				denom = denom << exp;

 				/* stat_rat = num/denom * 64 */
 				temp = div_s(num >> 1, denom);
 				stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
 			}

 			/* compare stat_rat with a threshold and update stat_count */
 			if(stat_rat > STAT_THR)
 			{
 				st->stat_count = STAT_COUNT;
 			} else
 			{
 				if ((st->vadreg & 0x4000) != 0)
 				{

 					if (st->stat_count != 0)
 					{
 						st->stat_count = st->stat_count - 1;
 					}
 				}
 			}
 		}
 	}

 	/* Update average amplitude estimate for stationarity estimation */
 	alpha = ALPHA4;
 	if(st->stat_count == STAT_COUNT)
 	{
 		alpha = 32767;
 	} else if ((st->vadreg & 0x4000) == 0)
 	{
 		alpha = ALPHA5;
 	}
 	for (i = 0; i < COMPLEN; i++)
 	{
 		st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));
 	}
 }

 /******************************************************************************
 *
 *     Function     : hangover_addition
 *     Purpose      : Add hangover after speech bursts
 *
 *******************************************************************************/

 static Word16 hangover_addition(                      /* return: VAD_flag indicating final VAD decision */
 		VadVars * st,                         /* i/o : State structure                     */
 		Word16 low_power,                     /* i   : flag power of the input frame    */
 		Word16 hang_len,                      /* i   : hangover length */
 		Word16 burst_len                      /* i   : minimum burst length for hangover addition */
 		)
 {
 	/* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0"         */
 	if (low_power != 0)
 	{
 		st->burst_count = 0;
 		st->hang_count = 0;
 		return 0;
 	}
 	/* update the counters (hang_count, burst_count) */
 	if ((st->vadreg & 0x4000) != 0)
 	{
 		st->burst_count = st->burst_count + 1;
 		if(st->burst_count >= burst_len)
 		{
 			st->hang_count = hang_len;
 		}
 		return 1;
 	} else
 	{
 		st->burst_count = 0;
 		if (st->hang_count > 0)
 		{
 			st->hang_count = st->hang_count - 1;
 			return 1;
 		}
 	}
 	return 0;
 }

 /******************************************************************************
 *
 *     Function   : noise_estimate_update
 *     Purpose    : Update of background noise estimate
 *
 *******************************************************************************/

 static void noise_estimate_update(
 		VadVars * st,                         /* i/o : State structure                       */
 		Word16 level[]                        /* i   : sub-band levels of the input frame */
 		)
 {
 	Word32 i;
 	Word16 alpha_up, alpha_down, bckr_add = 2;

 	/* Control update of bckr_est[] */
 	update_cntrl(st, level);

 	/* Choose update speed */
 	if ((0x7800 & st->vadreg) == 0)
 	{
 		alpha_up = ALPHA_UP1;
 		alpha_down = ALPHA_DOWN1;
 	} else
 	{
 		if ((st->stat_count == 0))
 		{
 			alpha_up = ALPHA_UP2;
 			alpha_down = ALPHA_DOWN2;
 		} else
 		{
 			alpha_up = 0;
 			alpha_down = ALPHA3;
 			bckr_add = 0;
 		}
 	}

 	/* Update noise estimate (bckr_est) */
 	for (i = 0; i < COMPLEN; i++)
 	{
 		Word16 temp;
 		temp = (st->old_level[i] - st->bckr_est[i]);

 		if (temp < 0)
 		{                                  /* update downwards */
 			st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp)));
 			/* limit minimum value of the noise estimate to NOISE_MIN */
 			if(st->bckr_est[i] < NOISE_MIN)
 			{
 				st->bckr_est[i] = NOISE_MIN;
 			}
 		} else
 		{                                  /* update upwards */
 			st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));

 			/* limit maximum value of the noise estimate to NOISE_MAX */
 			if(st->bckr_est[i] > NOISE_MAX)
 			{
 				st->bckr_est[i] = NOISE_MAX;
 			}
 		}
 	}

 	/* Update signal levels of the previous frame (old_level) */
 	for (i = 0; i < COMPLEN; i++)
 	{
 		st->old_level[i] = level[i];
 	}
 }

 /******************************************************************************
 *
 *     Function     : vad_decision
 *     Purpose      : Calculates VAD_flag
 *
 *******************************************************************************/

 static Word16 vad_decision(                           /* return value : VAD_flag */
 		VadVars * st,                         /* i/o : State structure                       */
 		Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
 		Word32 pow_sum                        /* i   : power of the input frame           */
 		)
 {
 	Word32 i;
 	Word32 L_snr_sum;
 	Word32 L_temp;
 	Word16 vad_thr, temp, noise_level;
 	Word16 low_power_flag;
 	Word16 hang_len, burst_len;
 	Word16 ilog2_speech_level, ilog2_noise_level;
 	Word16 temp2;

 	/* Calculate squared sum of the input levels (level) divided by the background noise components
 	 * (bckr_est). */
 	L_snr_sum = 0;
 	for (i = 0; i < COMPLEN; i++)
 	{
 		Word16 exp;

 		exp = norm_s(st->bckr_est[i]);
 		temp = (st->bckr_est[i] << exp);
 		temp = div_s((level[i] >> 1), temp);
 		temp = shl(temp, (exp - (UNIRSHFT - 1)));
 		L_snr_sum = L_mac(L_snr_sum, temp, temp);
 	}

 	/* Calculate average level of estimated background noise */
 	L_temp = 0;
 	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
 	{
 		L_temp = vo_L_add(L_temp, st->bckr_est[i]);
 	}

 	noise_level = extract_h((L_temp << 12));
 	/* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
 	temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;

 	if(st->speech_level < temp)
 	{
 		st->speech_level = temp;
 	}
 	ilog2_noise_level = ilog2(noise_level);

 	/* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
 	 * subtracting MIN_SPEECH_SNR*noise_level from speech level */
 	ilog2_speech_level = ilog2(st->speech_level - temp);

 	temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);

 	temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
 	if (temp2 < SP_CH_MIN)
 	{
 		temp2 = SP_CH_MIN;
 	}
 	if (temp2 > SP_CH_MAX)
 	{
 		temp2 = SP_CH_MAX;
 	}
 	vad_thr = temp + temp2;

 	if(vad_thr < THR_MIN)
 	{
 		vad_thr = THR_MIN;
 	}
 	/* Shift VAD decision register */
 	st->vadreg = (st->vadreg >> 1);

 	/* Make intermediate VAD decision */
 	if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
 	{
 		st->vadreg = (Word16) (st->vadreg | 0x4000);
 	}
 	/* check if the input power (pow_sum) is lower than a threshold" */
 	if(pow_sum < VAD_POW_LOW)
 	{
 		low_power_flag = 1;
 	} else
 	{
 		low_power_flag = 0;
 	}
 	/* Update background noise estimates */
 	noise_estimate_update(st, level);

 	/* Calculate values for hang_len and burst_len based on vad_thr */
 	hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
 	if(hang_len < HANG_LOW)
 	{
 		hang_len = HANG_LOW;
 	}
 	burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);

 	return (hangover_addition(st, low_power_flag, hang_len, burst_len));
 }

 /******************************************************************************
 *
 *     Function : Estimate_Speech()
 *     Purpose  : Estimate speech level
 *
 * Maximum signal level is searched and stored to the variable sp_max.
 * The speech frames must locate within SP_EST_COUNT number of frames.
 * Thus, noisy frames having occasional VAD = "1" decisions will not
 * affect to the estimated speech_level.
 *
 *******************************************************************************/

 static void Estimate_Speech(
 		VadVars * st,                         /* i/o : State structure    */
 		Word16 in_level                       /* level of the input frame */
 		)
 {
 	Word16 alpha;

 	/* if the required activity count cannot be achieved, reset counters */
 	if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
 	{
 		st->sp_est_cnt = 0;
 		st->sp_max = 0;
 		st->sp_max_cnt = 0;
 	}
 	st->sp_est_cnt += 1;

 	if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
 	{
 		/* update sp_max */
 		if(in_level > st->sp_max)
 		{
 			st->sp_max = in_level;
 		}
 		st->sp_max_cnt += 1;

 		if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
 		{
 			Word16 tmp;
 			/* update speech estimate */
 			tmp = (st->sp_max >> 1);      /* scale to get "average" speech level */

 			/* select update speed */
 			if(tmp > st->speech_level)
 			{
 				alpha = ALPHA_SP_UP;
 			} else
 			{
 				alpha = ALPHA_SP_DOWN;
 			}
 			if(tmp > MIN_SPEECH_LEVEL2)
 			{
 				st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level)));
 			}
 			/* clear all counters used for speech estimation */
 			st->sp_max = 0;
 			st->sp_max_cnt = 0;
 			st->sp_est_cnt = 0;
 		}
 	}
 }

 /******************************************************************************
 *
 *  Function:   wb_vad_init
 *  Purpose:    Allocates state memory and initializes state memory
 *
 *******************************************************************************/

 Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
 		VadVars ** state,                     /* i/o : State structure    */
 		VO_MEM_OPERATOR *pMemOP
 		)
 {
 	VadVars *s;

 	if (state == (VadVars **) NULL)
 	{
 		fprintf(stderr, "vad_init: invalid parameter\n");
 		return -1;
 	}
 	*state = NULL;

 	/* allocate memory */
 	if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
 	{
 		fprintf(stderr, "vad_init: can not malloc state structure\n");
 		return -1;
 	}
 	wb_vad_reset(s);

 	*state = s;

 	return 0;
 }

 /******************************************************************************
 *
 *  Function:   wb_vad_reset
 *  Purpose:    Initializes state memory
 *
 *******************************************************************************/

 Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
 		VadVars * state                       /* i/o : State structure    */
 		)
 {
 	Word32 i, j;

 	if (state == (VadVars *) NULL)
 	{
 		fprintf(stderr, "vad_reset: invalid parameter\n");
 		return -1;
 	}
 	state->tone_flag = 0;
 	state->vadreg = 0;
 	state->hang_count = 0;
 	state->burst_count = 0;
 	state->hang_count = 0;

 	/* initialize memory used by the filter bank */
 	for (i = 0; i < F_5TH_CNT; i++)
 	{
 		for (j = 0; j < 2; j++)
 		{
 			state->a_data5[i][j] = 0;
 		}
 	}

 	for (i = 0; i < F_3TH_CNT; i++)
 	{
 		state->a_data3[i] = 0;
 	}

 	/* initialize the rest of the memory */
 	for (i = 0; i < COMPLEN; i++)
 	{
 		state->bckr_est[i] = NOISE_INIT;
 		state->old_level[i] = NOISE_INIT;
 		state->ave_level[i] = NOISE_INIT;
 		state->sub_level[i] = 0;
 	}

 	state->sp_est_cnt = 0;
 	state->sp_max = 0;
 	state->sp_max_cnt = 0;
 	state->speech_level = SPEECH_LEVEL_INIT;
 	state->prev_pow_sum = 0;
 	return 0;
 }

 /******************************************************************************
 *
 *  Function:   wb_vad_exit
 *  Purpose:    The memory used for state memory is freed
 *
 *******************************************************************************/

 void wb_vad_exit(
 		VadVars ** state,                      /* i/o : State structure    */
 		VO_MEM_OPERATOR *pMemOP
 		)
 {
 	if (state == NULL || *state == NULL)
 		return;
 	/* deallocate memory */
 	mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
 	*state = NULL;
 	return;
 }

 /******************************************************************************
 *
 *     Function     : wb_vad_tone_detection
 *     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
 *                    pitch gain is high. This is used to detect
 *                    signaling tones and other signals with high pitch gain.
 *
 *******************************************************************************/

 void wb_vad_tone_detection(
 		VadVars * st,                         /* i/o : State struct            */
 		Word16 p_gain                         /* pitch gain      */
 		)
 {
 	/* update tone flag */
 	st->tone_flag = (st->tone_flag >> 1);

 	/* if (pitch_gain > TONE_THR) set tone flag */
 	if (p_gain > TONE_THR)
 	{
 		st->tone_flag = (Word16) (st->tone_flag | 0x4000);
 	}
 }

 /******************************************************************************
 *
 *     Function     : wb_vad
 *     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
 *
 *******************************************************************************/

 Word16 wb_vad(                                /* Return value : VAD Decision, 1 = speech, 0 = noise */
 		VadVars * st,                         /* i/o : State structure                 */
 		Word16 in_buf[]                       /* i   : samples of the input frame   */
 	     )
 {
 	Word16 level[COMPLEN];
 	Word32 i;
 	Word16 VAD_flag, temp;
 	Word32 L_temp, pow_sum;

 	/* Calculate power of the input frame. */
 	L_temp = 0L;
 	for (i = 0; i < FRAME_LEN; i++)
 	{
 		L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
 	}

 	/* pow_sum = power of current frame and previous frame */
 	pow_sum = L_add(L_temp, st->prev_pow_sum);

 	/* save power of current frame for next call */
 	st->prev_pow_sum = L_temp;

 	/* If input power is very low, clear tone flag */
 	if (pow_sum < POW_TONE_THR)
 	{
 		st->tone_flag = (Word16) (st->tone_flag & 0x1fff);
 	}
 	/* Run the filter bank and calculate signal levels at each band */
 	filter_bank(st, in_buf, level);

 	/* compute VAD decision */
 	VAD_flag = vad_decision(st, level, pow_sum);

 	/* Calculate input level */
 	L_temp = 0;
 	for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
 	{
 		L_temp = vo_L_add(L_temp, level[i]);
 	}

 	temp = extract_h(L_temp << 12);

 	Estimate_Speech(st, temp);             /* Estimate speech level */
 	return (VAD_flag);
 }
	/*
	** Copyright 2003-2010, VisualOn, Inc.
	**
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	**
	** http://www.apache.org/licenses/LICENSE-2.0
	**
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*/

	/***********************************************************************
	* File: wb_vad.c *
	* *
	* Description: Voice Activity Detection *
	* *
	************************************************************************/

	#include <stdlib.h>
	#include <stdio.h>
	#include "cnst.h"
	#include "wb_vad.h"
	#include "typedef.h"
	#include "basic_op.h"
	#include "math_op.h"
	#include "wb_vad_c.h"
	#include "mem_align.h"

	/******************************************************************************
	* Calculate Log2 and scale the signal:
	*
	* ilog2(Word32 in) = -1024log10(in 2^-31)/log10(2), where in = [1, 2^31-1]
	*
	* input output
	* 32768 16384
	* 1 31744
	*
	* When input is in the range of [1,2^16], max error is 0.0380%.
	*********************************************************************************/

	static Word16 ilog2( /* return: output value of the log2 */
	Word16 mant /* i: value to be converted */
	)
	{
	Word16 ex, ex2, res;
	Word32 i, l_temp;

	if (mant <= 0)
	{
	mant = 1;
	}
	ex = norm_s(mant);
	mant = mant << ex;

	for (i = 0; i < 3; i++)
	mant = vo_mult(mant, mant);
	l_temp = vo_L_mult(mant, mant);

	ex2 = norm_l(l_temp);
	mant = extract_h(l_temp << ex2);

	res = (ex + 16) << 10;
	res = add1(res, (ex2 << 6));
	res = vo_sub(add1(res, 127), (mant >> 8));
	return (res);
	}

	/******************************************************************************
	*
	* Function : filter5
	* Purpose : Fifth-order half-band lowpass/highpass filter pair with
	* decimation.
	*
	*******************************************************************************/

	static void filter5(
	Word16 * in0, /* i/o : input values; output low-pass part */
	Word16 * in1, /* i/o : input values; output high-pass part */
	Word16 data[] /* i/o : filter memory */
	)
	{
	Word16 temp0, temp1, temp2;

	temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
	temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
	data[0] = temp0;

	temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
	temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
	data[1] = temp0;

	*in0 = extract_h((vo_L_add(temp1, temp2) << 15));
	*in1 = extract_h((vo_L_sub(temp1, temp2) << 15));
	}

	/******************************************************************************
	*
	* Function : filter3
	* Purpose : Third-order half-band lowpass/highpass filter pair with
	* decimation.
	*
	*******************************************************************************/

	static void filter3(
	Word16 * in0, /* i/o : input values; output low-pass part */
	Word16 * in1, /* i/o : input values; output high-pass part */
	Word16 * data /* i/o : filter memory */
	)
	{
	Word16 temp1, temp2;

	temp1 = vo_sub(in1, vo_mult(COEFF3, data));
	temp2 = add1(*data, vo_mult(COEFF3, temp1));
	*data = temp1;

	in1 = extract_h((vo_L_sub(in0, temp2) << 15));
	in0 = extract_h((vo_L_add(in0, temp2) << 15));
	}

	/******************************************************************************
	*
	* Function : level_calculation
	* Purpose : Calculate signal level in a sub-band. Level is calculated
	* by summing absolute values of the input data.
	*
	* Signal level calculated from of the end of the frame
	* (data[count1 - count2]) is stored to (*sub_level)
	* and added to the level of the next frame.
	*
	******************************************************************************/

	static Word16 level_calculation( /* return: signal level */
	Word16 data[], /* i : signal buffer */
	Word16 * sub_level, /* i : level calculated at the end of the previous frame*/
	/* o : level of signal calculated from the last */
	/* (count2 - count1) samples */
	Word16 count1, /* i : number of samples to be counted */
	Word16 count2, /* i : number of samples to be counted */
	Word16 ind_m, /* i : step size for the index of the data buffer */
	Word16 ind_a, /* i : starting index of the data buffer */
	Word16 scale /* i : scaling for the level calculation */
	)
	{
	Word32 i, l_temp1, l_temp2;
	Word16 level;

	l_temp1 = 0L;
	for (i = count1; i < count2; i++)
	{
	l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
	}

	l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
	*sub_level = extract_h(L_shl(l_temp1, scale));

	for (i = 0; i < count1; i++)
	{
	l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
	}
	level = extract_h(L_shl2(l_temp2, scale));

	return level;
	}

	/******************************************************************************
	*
	* Function : filter_bank
	* Purpose : Divide input signal into bands and calculate level of
	* the signal in each band
	*
	*******************************************************************************/

	static void filter_bank(
	VadVars * st, /* i/o : State struct */
	Word16 in[], /* i : input frame */
	Word16 level[] /* o : signal levels at each band */
	)
	{
	Word32 i;
	Word16 tmp_buf[FRAME_LEN];

	/* shift input 1 bit down for safe scaling */
	for (i = 0; i < FRAME_LEN; i++)
	{
	tmp_buf[i] = in[i] >> 1;
	}

	/* run the filter bank */
	for (i = 0; i < 128; i++)
	{
	filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
	}
	for (i = 0; i < 64; i++)
	{
	filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
	filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
	}
	for (i = 0; i < 32; i++)
	{
	filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
	filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
	filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
	}
	for (i = 0; i < 16; i++)
	{
	filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
	filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
	filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
	}

	for (i = 0; i < 8; i++)
	{
	filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
	filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
	}

	/* calculate levels in each frequency band */

	/* 4800 - 6400 Hz */
	level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);
	/* 4000 - 4800 Hz */
	level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);
	/* 3200 - 4000 Hz */
	level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);
	/* 2400 - 3200 Hz */
	level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);
	/* 2000 - 2400 Hz */
	level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);
	/* 1600 - 2000 Hz */
	level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);
	/* 1200 - 1600 Hz */
	level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);
	/* 800 - 1200 Hz */
	level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);
	/* 600 - 800 Hz */
	level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17);
	/* 400 - 600 Hz */
	level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);
	/* 200 - 400 Hz */
	level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);
	/* 0 - 200 Hz */
	level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17);
	}

	/******************************************************************************
	*
	* Function : update_cntrl
	* Purpose : Control update of the background noise estimate.
	*
	*******************************************************************************/

	static void update_cntrl(
	VadVars * st, /* i/o : State structure */
	Word16 level[] /* i : sub-band levels of the input frame */
	)
	{
	Word32 i;
	Word16 num, temp, stat_rat, exp, denom;
	Word16 alpha;

	/* if a tone has been detected for a while, initialize stat_count */
	if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
	{
	st->stat_count = STAT_COUNT;
	} else
	{
	/* if 8 last vad-decisions have been "0", reinitialize stat_count */
	if ((st->vadreg & 0x7f80) == 0)
	{
	st->stat_count = STAT_COUNT;
	} else
	{
	stat_rat = 0;
	for (i = 0; i < COMPLEN; i++)
	{
	if(level[i] > st->ave_level[i])
	{
	num = level[i];
	denom = st->ave_level[i];
	} else
	{
	num = st->ave_level[i];
	denom = level[i];
	}
	/* Limit nimimum value of num and denom to STAT_THR_LEVEL */
	if(num < STAT_THR_LEVEL)
	{
	num = STAT_THR_LEVEL;
	}
	if(denom < STAT_THR_LEVEL)
	{
	denom = STAT_THR_LEVEL;
	}
	exp = norm_s(denom);
	denom = denom << exp;

	/* stat_rat = num/denom * 64 */
	temp = div_s(num >> 1, denom);
	stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
	}

	/* compare stat_rat with a threshold and update stat_count */
	if(stat_rat > STAT_THR)
	{
	st->stat_count = STAT_COUNT;
	} else
	{
	if ((st->vadreg & 0x4000) != 0)
	{

	if (st->stat_count != 0)
	{
	st->stat_count = st->stat_count - 1;
	}
	}
	}
	}
	}

	/* Update average amplitude estimate for stationarity estimation */
	alpha = ALPHA4;
	if(st->stat_count == STAT_COUNT)
	{
	alpha = 32767;
	} else if ((st->vadreg & 0x4000) == 0)
	{
	alpha = ALPHA5;
	}
	for (i = 0; i < COMPLEN; i++)
	{
	st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));
	}
	}

	/******************************************************************************
	*
	* Function : hangover_addition
	* Purpose : Add hangover after speech bursts
	*
	*******************************************************************************/

	static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */
	VadVars * st, /* i/o : State structure */
	Word16 low_power, /* i : flag power of the input frame */
	Word16 hang_len, /* i : hangover length */
	Word16 burst_len /* i : minimum burst length for hangover addition */
	)
	{
	/* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0" */
	if (low_power != 0)
	{
	st->burst_count = 0;
	st->hang_count = 0;
	return 0;
	}
	/* update the counters (hang_count, burst_count) */
	if ((st->vadreg & 0x4000) != 0)
	{
	st->burst_count = st->burst_count + 1;
	if(st->burst_count >= burst_len)
	{
	st->hang_count = hang_len;
	}
	return 1;
	} else
	{
	st->burst_count = 0;
	if (st->hang_count > 0)
	{
	st->hang_count = st->hang_count - 1;
	return 1;
	}
	}
	return 0;
	}

	/******************************************************************************
	*
	* Function : noise_estimate_update
	* Purpose : Update of background noise estimate
	*
	*******************************************************************************/

	static void noise_estimate_update(
	VadVars * st, /* i/o : State structure */
	Word16 level[] /* i : sub-band levels of the input frame */
	)
	{
	Word32 i;
	Word16 alpha_up, alpha_down, bckr_add = 2;

	/* Control update of bckr_est[] */
	update_cntrl(st, level);

	/* Choose update speed */
	if ((0x7800 & st->vadreg) == 0)
	{
	alpha_up = ALPHA_UP1;
	alpha_down = ALPHA_DOWN1;
	} else
	{
	if ((st->stat_count == 0))
	{
	alpha_up = ALPHA_UP2;
	alpha_down = ALPHA_DOWN2;
	} else
	{
	alpha_up = 0;
	alpha_down = ALPHA3;
	bckr_add = 0;
	}
	}

	/* Update noise estimate (bckr_est) */
	for (i = 0; i < COMPLEN; i++)
	{
	Word16 temp;
	temp = (st->old_level[i] - st->bckr_est[i]);

	if (temp < 0)
	{ /* update downwards */
	st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp)));
	/* limit minimum value of the noise estimate to NOISE_MIN */
	if(st->bckr_est[i] < NOISE_MIN)
	{
	st->bckr_est[i] = NOISE_MIN;
	}
	} else
	{ /* update upwards */
	st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));

	/* limit maximum value of the noise estimate to NOISE_MAX */
	if(st->bckr_est[i] > NOISE_MAX)
	{
	st->bckr_est[i] = NOISE_MAX;
	}
	}
	}

	/* Update signal levels of the previous frame (old_level) */
	for (i = 0; i < COMPLEN; i++)
	{
	st->old_level[i] = level[i];
	}
	}

	/******************************************************************************
	*
	* Function : vad_decision
	* Purpose : Calculates VAD_flag
	*
	*******************************************************************************/

	static Word16 vad_decision( /* return value : VAD_flag */
	VadVars * st, /* i/o : State structure */
	Word16 level[COMPLEN], /* i : sub-band levels of the input frame */
	Word32 pow_sum /* i : power of the input frame */
	)
	{
	Word32 i;
	Word32 L_snr_sum;
	Word32 L_temp;
	Word16 vad_thr, temp, noise_level;
	Word16 low_power_flag;
	Word16 hang_len, burst_len;
	Word16 ilog2_speech_level, ilog2_noise_level;
	Word16 temp2;

	/* Calculate squared sum of the input levels (level) divided by the background noise components
	* (bckr_est). */
	L_snr_sum = 0;
	for (i = 0; i < COMPLEN; i++)
	{
	Word16 exp;

	exp = norm_s(st->bckr_est[i]);
	temp = (st->bckr_est[i] << exp);
	temp = div_s((level[i] >> 1), temp);
	temp = shl(temp, (exp - (UNIRSHFT - 1)));
	L_snr_sum = L_mac(L_snr_sum, temp, temp);
	}

	/* Calculate average level of estimated background noise */
	L_temp = 0;
	for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
	{
	L_temp = vo_L_add(L_temp, st->bckr_est[i]);
	}

	noise_level = extract_h((L_temp << 12));
	/* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
	temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;

	if(st->speech_level < temp)
	{
	st->speech_level = temp;
	}
	ilog2_noise_level = ilog2(noise_level);

	/* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
	* subtracting MIN_SPEECH_SNRnoise_level from speech level /
	ilog2_speech_level = ilog2(st->speech_level - temp);

	temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);

	temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
	if (temp2 < SP_CH_MIN)
	{
	temp2 = SP_CH_MIN;
	}
	if (temp2 > SP_CH_MAX)
	{
	temp2 = SP_CH_MAX;
	}
	vad_thr = temp + temp2;

	if(vad_thr < THR_MIN)
	{
	vad_thr = THR_MIN;
	}
	/* Shift VAD decision register */
	st->vadreg = (st->vadreg >> 1);

	/* Make intermediate VAD decision */
	if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
	{
	st->vadreg = (Word16) (st->vadreg \| 0x4000);
	}
	/* check if the input power (pow_sum) is lower than a threshold" */
	if(pow_sum < VAD_POW_LOW)
	{
	low_power_flag = 1;
	} else
	{
	low_power_flag = 0;
	}
	/* Update background noise estimates */
	noise_estimate_update(st, level);

	/* Calculate values for hang_len and burst_len based on vad_thr */
	hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
	if(hang_len < HANG_LOW)
	{
	hang_len = HANG_LOW;
	}
	burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);

	return (hangover_addition(st, low_power_flag, hang_len, burst_len));
	}

	/******************************************************************************
	*
	* Function : Estimate_Speech()
	* Purpose : Estimate speech level
	*
	* Maximum signal level is searched and stored to the variable sp_max.
	* The speech frames must locate within SP_EST_COUNT number of frames.
	* Thus, noisy frames having occasional VAD = "1" decisions will not
	* affect to the estimated speech_level.
	*
	*******************************************************************************/

	static void Estimate_Speech(
	VadVars * st, /* i/o : State structure */
	Word16 in_level /* level of the input frame */
	)
	{
	Word16 alpha;

	/* if the required activity count cannot be achieved, reset counters */
	if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
	{
	st->sp_est_cnt = 0;
	st->sp_max = 0;
	st->sp_max_cnt = 0;
	}
	st->sp_est_cnt += 1;

	if (((st->vadreg & 0x4000)\|\|(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
	{
	/* update sp_max */
	if(in_level > st->sp_max)
	{
	st->sp_max = in_level;
	}
	st->sp_max_cnt += 1;

	if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
	{
	Word16 tmp;
	/* update speech estimate */
	tmp = (st->sp_max >> 1); /* scale to get "average" speech level */

	/* select update speed */
	if(tmp > st->speech_level)
	{
	alpha = ALPHA_SP_UP;
	} else
	{
	alpha = ALPHA_SP_DOWN;
	}
	if(tmp > MIN_SPEECH_LEVEL2)
	{
	st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level)));
	}
	/* clear all counters used for speech estimation */
	st->sp_max = 0;
	st->sp_max_cnt = 0;
	st->sp_est_cnt = 0;
	}
	}
	}

	/******************************************************************************
	*
	* Function: wb_vad_init
	* Purpose: Allocates state memory and initializes state memory
	*
	*******************************************************************************/

	Word16 wb_vad_init( /* return: non-zero with error, zero for ok. */
	VadVars ** state, /* i/o : State structure */
	VO_MEM_OPERATOR *pMemOP
	)
	{
	VadVars *s;

	if (state == (VadVars **) NULL)
	{
	fprintf(stderr, "vad_init: invalid parameter\n");
	return -1;
	}
	*state = NULL;

	/* allocate memory */
	if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
	{
	fprintf(stderr, "vad_init: can not malloc state structure\n");
	return -1;
	}
	wb_vad_reset(s);

	*state = s;

	return 0;
	}

	/******************************************************************************
	*
	* Function: wb_vad_reset
	* Purpose: Initializes state memory
	*
	*******************************************************************************/

	Word16 wb_vad_reset( /* return: non-zero with error, zero for ok. */
	VadVars * state /* i/o : State structure */
	)
	{
	Word32 i, j;

	if (state == (VadVars *) NULL)
	{
	fprintf(stderr, "vad_reset: invalid parameter\n");
	return -1;
	}
	state->tone_flag = 0;
	state->vadreg = 0;
	state->hang_count = 0;
	state->burst_count = 0;
	state->hang_count = 0;

	/* initialize memory used by the filter bank */
	for (i = 0; i < F_5TH_CNT; i++)
	{
	for (j = 0; j < 2; j++)
	{
	state->a_data5[i][j] = 0;
	}
	}

	for (i = 0; i < F_3TH_CNT; i++)
	{
	state->a_data3[i] = 0;
	}

	/* initialize the rest of the memory */
	for (i = 0; i < COMPLEN; i++)
	{
	state->bckr_est[i] = NOISE_INIT;
	state->old_level[i] = NOISE_INIT;
	state->ave_level[i] = NOISE_INIT;
	state->sub_level[i] = 0;
	}

	state->sp_est_cnt = 0;
	state->sp_max = 0;
	state->sp_max_cnt = 0;
	state->speech_level = SPEECH_LEVEL_INIT;
	state->prev_pow_sum = 0;
	return 0;
	}

	/******************************************************************************
	*
	* Function: wb_vad_exit
	* Purpose: The memory used for state memory is freed
	*
	*******************************************************************************/

	void wb_vad_exit(
	VadVars ** state, /* i/o : State structure */
	VO_MEM_OPERATOR *pMemOP
	)
	{
	if (state == NULL \|\| *state == NULL)
	return;
	/* deallocate memory */
	mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
	*state = NULL;
	return;
	}

	/******************************************************************************
	*
	* Function : wb_vad_tone_detection
	* Purpose : Search maximum pitch gain from a frame. Set tone flag if
	* pitch gain is high. This is used to detect
	* signaling tones and other signals with high pitch gain.
	*
	*******************************************************************************/

	void wb_vad_tone_detection(
	VadVars * st, /* i/o : State struct */
	Word16 p_gain /* pitch gain */
	)
	{
	/* update tone flag */
	st->tone_flag = (st->tone_flag >> 1);

	/* if (pitch_gain > TONE_THR) set tone flag */
	if (p_gain > TONE_THR)
	{
	st->tone_flag = (Word16) (st->tone_flag \| 0x4000);
	}
	}

	/******************************************************************************
	*
	* Function : wb_vad
	* Purpose : Main program for Voice Activity Detection (VAD) for AMR
	*
	*******************************************************************************/

	Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */
	VadVars * st, /* i/o : State structure */
	Word16 in_buf[] /* i : samples of the input frame */
	)
	{
	Word16 level[COMPLEN];
	Word32 i;
	Word16 VAD_flag, temp;
	Word32 L_temp, pow_sum;

	/* Calculate power of the input frame. */
	L_temp = 0L;
	for (i = 0; i < FRAME_LEN; i++)
	{
	L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
	}

	/* pow_sum = power of current frame and previous frame */
	pow_sum = L_add(L_temp, st->prev_pow_sum);

	/* save power of current frame for next call */
	st->prev_pow_sum = L_temp;

	/* If input power is very low, clear tone flag */
	if (pow_sum < POW_TONE_THR)
	{
	st->tone_flag = (Word16) (st->tone_flag & 0x1fff);
	}
	/* Run the filter bank and calculate signal levels at each band */
	filter_bank(st, in_buf, level);

	/* compute VAD decision */
	VAD_flag = vad_decision(st, level, pow_sum);

	/* Calculate input level */
	L_temp = 0;
	for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
	{
	L_temp = vo_L_add(L_temp, level[i]);
	}

	temp = extract_h(L_temp << 12);

	Estimate_Speech(st, temp); /* Estimate speech level */
	return (VAD_flag);
	}