blob: 9b7bd4a74bcdbaa3ab5c85fb3e84063306699eba [file] [log] [blame]
Gregory Maxwellae231142011-07-30 08:18:48 -04001/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, (subject to the limitations in the disclaimer below)
5are permitted provided that the following conditions are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Skype Limited, nor the names of specific
12contributors, may be used to endorse or promote products derived from
13this software without specific prior written permission.
14NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
Jean-Marc Valin1c2f5632011-09-16 01:16:53 -070031#include "API.h"
32#include "main.h"
Gregory Maxwellae231142011-07-30 08:18:48 -040033
34/************************/
35/* Decoder Super Struct */
36/************************/
37typedef struct {
38 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ];
39 stereo_dec_state sStereo;
40 opus_int nChannelsAPI;
41 opus_int nChannelsInternal;
Jean-Marc Valinb24e5742011-10-11 21:09:14 -040042 opus_int prev_decode_only_middle;
Gregory Maxwellae231142011-07-30 08:18:48 -040043} silk_decoder;
44
45/*********************/
46/* Decoder functions */
47/*********************/
48
Gregory Maxwell64a35412011-09-02 10:31:17 -040049opus_int silk_Get_Decoder_Size( int *decSizeBytes )
Gregory Maxwellae231142011-07-30 08:18:48 -040050{
51 opus_int ret = SILK_NO_ERROR;
52
53 *decSizeBytes = sizeof( silk_decoder );
54
55 return ret;
56}
57
58/* Reset decoder state */
59opus_int silk_InitDecoder(
60 void* decState /* I/O: State */
61)
62{
63 opus_int n, ret = SILK_NO_ERROR;
64 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
65
66 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
67 ret = silk_init_decoder( &channel_state[ n ] );
68 }
69
70 return ret;
71}
72
73/* Decode a frame */
74opus_int silk_Decode(
75 void* decState, /* I/O: State */
76 silk_DecControlStruct* decControl, /* I/O: Control Structure */
77 opus_int lostFlag, /* I: 0: no loss, 1 loss, 2 decode FEC */
78 opus_int newPacketFlag, /* I: Indicates first decoder call for this packet */
79 ec_dec *psRangeDec, /* I/O Compressor data structure */
80 opus_int16 *samplesOut, /* O: Decoded output speech vector */
81 opus_int32 *nSamplesOut /* O: Number of samples decoded */
82)
83{
Jean-Marc Valinb24e5742011-10-11 21:09:14 -040084 opus_int i, n, delay, decode_only_middle = 0, ret = SILK_NO_ERROR;
Gregory Maxwellae231142011-07-30 08:18:48 -040085 opus_int32 nSamplesOutDec, LBRR_symbol;
Jean-Marc Valinb5972382011-10-07 08:38:27 -040086 opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];
Gregory Maxwellae231142011-07-30 08:18:48 -040087 opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
Gregory Maxwell64a35412011-09-02 10:31:17 -040088 opus_int32 MS_pred_Q13[ 2 ] = { 0 };
Gregory Maxwellae231142011-07-30 08:18:48 -040089 opus_int16 *resample_out_ptr;
90 silk_decoder *psDec = ( silk_decoder * )decState;
91 silk_decoder_state *channel_state = psDec->channel_state;
92
93 /**********************************/
94 /* Test if first frame in payload */
95 /**********************************/
96 if( newPacketFlag ) {
97 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
98 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */
99 }
100 }
101
Gregory Maxwellae231142011-07-30 08:18:48 -0400102 /* If Mono -> Stereo transition in bitstream: init state of second channel */
103 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
104 ret += silk_init_decoder( &channel_state[ 1 ] );
Gregory Maxwellae231142011-07-30 08:18:48 -0400105 }
106
Timothy B. Terriberry1e03a6e2011-10-14 16:14:36 -0700107 if( channel_state[ 0 ].nFramesDecoded == 0 ) {
108 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
Gregory Maxwellae231142011-07-30 08:18:48 -0400109 opus_int fs_kHz_dec;
110 if( decControl->payloadSize_ms == 0 ) {
111 /* Assuming packet loss, use 10 ms */
112 channel_state[ n ].nFramesPerPacket = 1;
113 channel_state[ n ].nb_subfr = 2;
114 } else if( decControl->payloadSize_ms == 10 ) {
115 channel_state[ n ].nFramesPerPacket = 1;
116 channel_state[ n ].nb_subfr = 2;
117 } else if( decControl->payloadSize_ms == 20 ) {
118 channel_state[ n ].nFramesPerPacket = 1;
119 channel_state[ n ].nb_subfr = 4;
120 } else if( decControl->payloadSize_ms == 40 ) {
121 channel_state[ n ].nFramesPerPacket = 2;
122 channel_state[ n ].nb_subfr = 4;
123 } else if( decControl->payloadSize_ms == 60 ) {
124 channel_state[ n ].nFramesPerPacket = 3;
125 channel_state[ n ].nb_subfr = 4;
126 } else {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700127 silk_assert( 0 );
Gregory Maxwellae231142011-07-30 08:18:48 -0400128 return SILK_DEC_INVALID_FRAME_SIZE;
129 }
130 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
131 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700132 silk_assert( 0 );
Gregory Maxwellae231142011-07-30 08:18:48 -0400133 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
134 }
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400135 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
Gregory Maxwellae231142011-07-30 08:18:48 -0400136 }
137 }
138
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400139 delay = channel_state[ 0 ].delay;
140
Gregory Maxwellae231142011-07-30 08:18:48 -0400141 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700142 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
143 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400144 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
145 silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, sizeof(channel_state[ 0 ].delayBuf));
Gregory Maxwellae231142011-07-30 08:18:48 -0400146 }
147 psDec->nChannelsAPI = decControl->nChannelsAPI;
148 psDec->nChannelsInternal = decControl->nChannelsInternal;
149
150 if( decControl->API_sampleRate > MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
151 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
152 return( ret );
153 }
154
155 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
156 /* First decoder call for this payload */
157 /* Decode VAD flags and LBRR flag */
158 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
159 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
160 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
161 }
162 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
163 }
164 /* Decode LBRR flags */
165 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700166 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
Gregory Maxwellae231142011-07-30 08:18:48 -0400167 if( channel_state[ n ].LBRR_flag ) {
168 if( channel_state[ n ].nFramesPerPacket == 1 ) {
169 channel_state[ n ].LBRR_flags[ 0 ] = 1;
170 } else {
171 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
172 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700173 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
Gregory Maxwellae231142011-07-30 08:18:48 -0400174 }
175 }
176 }
177 }
178
179 if( lostFlag == FLAG_DECODE_NORMAL ) {
180 /* Regular decoding: skip all LBRR data */
181 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
182 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
183 if( channel_state[ n ].LBRR_flags[ i ] ) {
184 opus_int pulses[ MAX_FRAME_LENGTH ];
Timothy B. Terriberry53cc1a02011-10-14 13:38:24 -0700185 opus_int condCoding;
186
Gregory Maxwellae231142011-07-30 08:18:48 -0400187 if( decControl->nChannelsInternal == 2 && n == 0 ) {
Koen Vos4e1ce382011-08-25 13:50:21 -0400188 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
189 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
190 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
191 }
Gregory Maxwellae231142011-07-30 08:18:48 -0400192 }
Timothy B. Terriberry53cc1a02011-10-14 13:38:24 -0700193 /* Use conditional coding if previous frame available */
194 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
195 condCoding = CODE_CONDITIONALLY;
196 } else {
197 condCoding = CODE_INDEPENDENTLY;
198 }
199 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
Gregory Maxwellae231142011-07-30 08:18:48 -0400200 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
201 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
202 }
203 }
204 }
205 }
206 }
207
208 /* Get MS predictor index */
209 if( decControl->nChannelsInternal == 2 ) {
210 if( lostFlag == FLAG_DECODE_NORMAL ||
211 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
212 {
Koen Vos4e1ce382011-08-25 13:50:21 -0400213 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
Koen Vos3195f6c2011-10-10 20:46:32 -0400214 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
Koen Vos4e1ce382011-08-25 13:50:21 -0400215 if( lostFlag == FLAG_DECODE_NORMAL ||
216 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
217 {
218 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
219 } else {
220 decode_only_middle = 0;
221 }
Gregory Maxwellae231142011-07-30 08:18:48 -0400222 } else {
Gregory Maxwell5c3d1552011-08-15 13:18:52 -0400223 for( n = 0; n < 2; n++ ) {
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400224 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
Gregory Maxwell5c3d1552011-08-15 13:18:52 -0400225 }
Gregory Maxwellae231142011-07-30 08:18:48 -0400226 }
227 }
228
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400229 /* Reset side channel decoder prediction memory for first frame with side coding */
230 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
231 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
232 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
233 psDec->channel_state[ 1 ].lagPrev = 100;
234 psDec->channel_state[ 1 ].LastGainIndex = 10;
235 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
236 }
237
Gregory Maxwellae231142011-07-30 08:18:48 -0400238 /* Call decoder for one frame */
239 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
Jean-Marc Valin4f1b7da2011-10-12 15:09:13 -0400240 if( n == 0 || ( ( lostFlag != FLAG_PACKET_LOST ? decode_only_middle : psDec->prev_decode_only_middle ) == 0 ) ) {
Timothy B. Terriberry53cc1a02011-10-14 13:38:24 -0700241 opus_int FrameIndex;
242 opus_int condCoding;
243
244 FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
245 /* Use independent coding if no previous frame available */
246 if( FrameIndex <= 0 ) {
247 condCoding = CODE_INDEPENDENTLY;
248 } else if( lostFlag == FLAG_DECODE_LBRR ) {
249 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
250 } else if( n > 0 && psDec->prev_decode_only_middle ) {
251 /* If we skipped a side frame in this packet, we don't
252 need LTP scaling; the LTP state is well-defined. */
253 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
254 } else {
255 condCoding = CODE_CONDITIONALLY;
256 }
257 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag, condCoding);
Gregory Maxwellae231142011-07-30 08:18:48 -0400258 } else {
Jean-Marc Valinb5972382011-10-07 08:38:27 -0400259 silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
Gregory Maxwellae231142011-07-30 08:18:48 -0400260 }
Timothy B. Terriberry1e03a6e2011-10-14 16:14:36 -0700261 channel_state[ n ].nFramesDecoded++;
Gregory Maxwellae231142011-07-30 08:18:48 -0400262 }
263
264 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
265 /* Convert Mid/Side to Left/Right */
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400266 silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][ delay ], &samplesOut1_tmp[ 1 ][ delay ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
Gregory Maxwellae231142011-07-30 08:18:48 -0400267 } else {
268 /* Buffering */
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400269 silk_memcpy( &samplesOut1_tmp[ 0 ][ delay ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
Jean-Marc Valinb5972382011-10-07 08:38:27 -0400270 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );
Gregory Maxwellae231142011-07-30 08:18:48 -0400271 }
272
273 /* Number of output samples */
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700274 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
Gregory Maxwellae231142011-07-30 08:18:48 -0400275
276 /* Set up pointers to temp buffers */
277 if( decControl->nChannelsAPI == 2 ) {
278 resample_out_ptr = samplesOut2_tmp;
279 } else {
280 resample_out_ptr = samplesOut;
281 }
282
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700283 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
Jean-Marc Valinb5972382011-10-07 08:38:27 -0400284
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400285 silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY - delay ], delay * sizeof(opus_int16));
Gregory Maxwellae231142011-07-30 08:18:48 -0400286 /* Resample decoded signal to API_sampleRate */
287 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400288 silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY * sizeof(opus_int16));
Gregory Maxwellae231142011-07-30 08:18:48 -0400289
290 /* Interleave if stereo output and stereo stream */
291 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
292 for( i = 0; i < *nSamplesOut; i++ ) {
293 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
294 }
295 }
296 }
297
298 /* Create two channel output from mono stream */
299 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
300 for( i = 0; i < *nSamplesOut; i++ ) {
301 samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
302 }
303 }
304
Jean-Marc Valinb24e5742011-10-11 21:09:14 -0400305 /* Export pitch lag, measured at 48 kHz sampling rate */
306 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
307 int mult_tab[ 3 ] = { 6, 4, 3 };
308 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
309 } else {
310 decControl->prevPitchLag = 0;
311 }
312
Jean-Marc Valin4f1b7da2011-10-12 15:09:13 -0400313 if ( lostFlag != FLAG_PACKET_LOST ) {
314 psDec->prev_decode_only_middle = decode_only_middle;
315 }
Gregory Maxwellae231142011-07-30 08:18:48 -0400316 return ret;
317}
318
319/* Getting table of contents for a packet */
320opus_int silk_get_TOC(
321 const opus_uint8 *payload, /* I Payload data */
322 const opus_int nBytesIn, /* I: Number of input bytes */
323 const opus_int nFramesPerPayload, /* I: Number of SILK frames per payload */
324 silk_TOC_struct *Silk_TOC /* O: Type of content */
325)
326{
327 opus_int i, flags, ret = SILK_NO_ERROR;
328
329 if( nBytesIn < 1 ) {
330 return -1;
331 }
332 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
333 return -1;
334 }
335
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700336 silk_memset( Silk_TOC, 0, sizeof( Silk_TOC ) );
Gregory Maxwellae231142011-07-30 08:18:48 -0400337
338 /* For stereo, extract the flags for the mid channel */
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700339 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
Gregory Maxwellae231142011-07-30 08:18:48 -0400340
341 Silk_TOC->inbandFECFlag = flags & 1;
342 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
Jean-Marc Valinfb3a4372011-09-16 00:58:26 -0700343 flags = silk_RSHIFT( flags, 1 );
Gregory Maxwellae231142011-07-30 08:18:48 -0400344 Silk_TOC->VADFlags[ i ] = flags & 1;
345 Silk_TOC->VADFlag |= flags & 1;
346 }
347
348 return ret;
349}