Blame - silk/x86/main_sse.h - platform/external/libopus

flim

c91ee5b

2016-01-26 14:33:44 +0100

[diff] [blame]

1

2

Written by XiangMingZhu WeiZhou MinPeng YanWang

3

4

Redistribution and use in source and binary forms, with or without

5

modification, are permitted provided that the following conditions

6

are met:

7

8

- Redistributions of source code must retain the above copyright

9

notice, this list of conditions and the following disclaimer.

10

11

- Redistributions in binary form must reproduce the above copyright

12

notice, this list of conditions and the following disclaimer in the

13

documentation and/or other materials provided with the distribution.

14

15

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

16

``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

17

LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

18

A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

19

OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

20

EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

21

PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

22

PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

23

LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

24

NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

25

SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/

#ifndef MAIN_SSE_H

#define MAIN_SSE_H

#ifdef HAVE_CONFIG_H

#include "config.h"

#endif

# if defined(OPUS_X86_MAY_HAVE_SSE4_1)

36

37

# define OVERRIDE_silk_VQ_WMat_EC

38

39

void silk_VQ_WMat_EC_sse4_1(

40

opus_int8 *ind, /* O index of best codebook vector */

41

opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */

42

opus_int *gain_Q7, /* O sum of absolute LTP coefficients */

43

const opus_int16 *in_Q14, /* I input vector to be quantized */

44

const opus_int32 *W_Q18, /* I weighting matrix */

45

const opus_int8 *cb_Q7, /* I codebook */

46

const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */

47

const opus_uint8 *cl_Q5, /* I code length for each codebook vector */

48

const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */

49

const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */

50

opus_int L /* I number of vectors in codebook */

51

);

52

53

#if defined OPUS_X86_PRESUME_SSE4_1

54

55

#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

56

mu_Q9, max_gain_Q7, L, arch) \

57

((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

58

mu_Q9, max_gain_Q7, L))

#else

extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(

63

opus_int8 *ind, /* O index of best codebook vector */

64

opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */

65

opus_int *gain_Q7, /* O sum of absolute LTP coefficients */

66

const opus_int16 *in_Q14, /* I input vector to be quantized */

67

const opus_int32 *W_Q18, /* I weighting matrix */

68

const opus_int8 *cb_Q7, /* I codebook */

69

const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */

70

const opus_uint8 *cl_Q5, /* I code length for each codebook vector */

71

const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */

72

const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */

73

opus_int L /* I number of vectors in codebook */

74

);

75

76

# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

77

mu_Q9, max_gain_Q7, L, arch) \

78

((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \

79

mu_Q9, max_gain_Q7, L))

#endif

# define OVERRIDE_silk_NSQ

84

85

void silk_NSQ_sse4_1(

86

const silk_encoder_state *psEncC, /* I/O Encoder State */

87

silk_nsq_state *NSQ, /* I/O NSQ state */

88

SideInfoIndices *psIndices, /* I/O Quantization Indices */

89

const opus_int32 x_Q3[], /* I Prefiltered input signal */

90

opus_int8 pulses[], /* O Quantized pulse signal */

91

const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */

92

const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */

93

const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */

94

const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */

95

const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */

96

const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */

97

const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */

98

const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */

99

const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */

100

const opus_int LTP_scale_Q14 /* I LTP state scaling */

101

);

102

103

#if defined OPUS_X86_PRESUME_SSE4_1

104

105

#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

106

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \

107

((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

108

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))

#else

extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(

113

const silk_encoder_state *psEncC, /* I/O Encoder State */

114

silk_nsq_state *NSQ, /* I/O NSQ state */

115

SideInfoIndices *psIndices, /* I/O Quantization Indices */

116

const opus_int32 x_Q3[], /* I Prefiltered input signal */

117

opus_int8 pulses[], /* O Quantized pulse signal */

118

const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */

119

const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */

120

const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */

121

const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */

122

const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */

123

const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */

124

const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */

125

const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */

126

const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */

127

const opus_int LTP_scale_Q14 /* I LTP state scaling */

128

);

129

130

# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

131

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \

132

((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

133

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))

#endif

# define OVERRIDE_silk_NSQ_del_dec

138

139

void silk_NSQ_del_dec_sse4_1(

140

const silk_encoder_state *psEncC, /* I/O Encoder State */

141

silk_nsq_state *NSQ, /* I/O NSQ state */

142

SideInfoIndices *psIndices, /* I/O Quantization Indices */

143

const opus_int32 x_Q3[], /* I Prefiltered input signal */

144

opus_int8 pulses[], /* O Quantized pulse signal */

145

const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */

146

const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */

147

const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */

148

const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */

149

const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */

150

const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */

151

const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */

152

const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */

153

const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */

154

const opus_int LTP_scale_Q14 /* I LTP state scaling */

155

);

156

157

#if defined OPUS_X86_PRESUME_SSE4_1

158

159

#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

160

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \

161

((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

162

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))

#else

extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(

167

const silk_encoder_state *psEncC, /* I/O Encoder State */

168

silk_nsq_state *NSQ, /* I/O NSQ state */

169

SideInfoIndices *psIndices, /* I/O Quantization Indices */

170

const opus_int32 x_Q3[], /* I Prefiltered input signal */

171

opus_int8 pulses[], /* O Quantized pulse signal */

172

const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */

173

const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */

174

const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */

175

const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */

176

const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */

177

const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */

178

const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */

179

const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */

180

const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */

181

const opus_int LTP_scale_Q14 /* I LTP state scaling */

182

);

183

184

# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

185

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \

186

((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \

187

HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))

#endif

void silk_noise_shape_quantizer(

192

silk_nsq_state *NSQ, /* I/O NSQ state */

193

opus_int signalType, /* I Signal type */

194

const opus_int32 x_sc_Q10[], /* I */

195

opus_int8 pulses[], /* O */

196

opus_int16 xq[], /* O */

197

opus_int32 sLTP_Q15[], /* I/O LTP state */

198

const opus_int16 a_Q12[], /* I Short term prediction coefs */

199

const opus_int16 b_Q14[], /* I Long term prediction coefs */

200

const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */

201

opus_int lag, /* I Pitch lag */

202

opus_int32 HarmShapeFIRPacked_Q14, /* I */

203

opus_int Tilt_Q14, /* I Spectral tilt */

204

opus_int32 LF_shp_Q14, /* I */

205

opus_int32 Gain_Q16, /* I */

206

opus_int Lambda_Q10, /* I */

207

opus_int offset_Q10, /* I */

208

opus_int length, /* I Input length */

209

opus_int shapingLPCOrder, /* I Noise shaping AR filter order */

210

opus_int predictLPCOrder /* I Prediction filter order */

211

);

212

213

/**************************/

214

/* Noise level estimation */

215

/**************************/

216

void silk_VAD_GetNoiseLevels(

217

const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */

218

silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */

219

);

220

221

# define OVERRIDE_silk_VAD_GetSA_Q8

222

223

opus_int silk_VAD_GetSA_Q8_sse4_1(

224

silk_encoder_state *psEnC,

225

const opus_int16 pIn[]

226

);

227

228

#if defined(OPUS_X86_PRESUME_SSE4_1)

229

#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))

#else

# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \

234

((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))

235

236

extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(

237

silk_encoder_state *psEnC,

238

const opus_int16 pIn[]);

239

240

# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX

#endif

void silk_warped_LPC_analysis_filter_FIX_sse4_1(

245

opus_int32 state[], /* I/O State [order + 1] */

246

opus_int32 res_Q2[], /* O Residual signal [length] */

247

const opus_int16 coef_Q13[], /* I Coefficients [order] */

248

const opus_int16 input[], /* I Input signal [length] */

249

const opus_int16 lambda_Q16, /* I Warping factor */

250

const opus_int length, /* I Length of input signal */

251

const opus_int order /* I Filter order (even) */

252

);

253

254

#if defined(OPUS_X86_PRESUME_SSE4_1)

255

#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \

256

((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))

#else

extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])(

261

opus_int32 state[], /* I/O State [order + 1] */

262

opus_int32 res_Q2[], /* O Residual signal [length] */

263

const opus_int16 coef_Q13[], /* I Coefficients [order] */

264

const opus_int16 input[], /* I Input signal [length] */

265

const opus_int16 lambda_Q16, /* I Warping factor */

266

const opus_int length, /* I Length of input signal */

267

const opus_int order /* I Filter order (even) */

268

);

269

270

# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \

271

((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order))

#endif

# endif

#endif