Use dynamic stack allocations in SILK decoder.

This allows the decoder to be compiled with
 NONTHREADSAFE_PSEUDOSTACK to move the memory for large buffers off
 the stack for devices where it is very limited.
This patch only attempts to do this for the decoder.
The encoder still requires more than 10 kB of stack.
diff --git a/silk/PLC.c b/silk/PLC.c
index 1b93d06..906ad33 100644
--- a/silk/PLC.c
+++ b/silk/PLC.c
@@ -30,6 +30,7 @@
 #endif
 
 #include "main.h"
+#include "stack_alloc.h"
 #include "PLC.h"
 
 #define NB_ATT 2
@@ -178,12 +179,17 @@
     opus_int16 rand_scale_Q14;
     opus_int16 *B_Q14, *exc_buf_ptr;
     opus_int32 *sLPC_Q14_ptr;
-    opus_int16 exc_buf[ 2 * MAX_SUB_FRAME_LENGTH ];
+    VARDECL( opus_int16, exc_buf );
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
-    opus_int16 sLTP[ MAX_FRAME_LENGTH ];
-    opus_int32 sLTP_Q14[ 2 * MAX_FRAME_LENGTH ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q14 );
     silk_PLC_struct *psPLC = &psDec->sPLC;
     opus_int32 prevGain_Q10[2];
+    SAVE_STACK;
+
+    ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
 
     prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
     prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@@ -354,6 +360,7 @@
     for( i = 0; i < MAX_NB_SUBFR; i++ ) {
         psDecCtrl->pitchL[ i ] = lag;
     }
+    RESTORE_STACK;
 }
 
 /* Glues concealed frames with new good recieved frames */
diff --git a/silk/dec_API.c b/silk/dec_API.c
index 60beef6..68403b7 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -30,6 +30,7 @@
 #endif
 #include "API.h"
 #include "main.h"
+#include "stack_alloc.h"
 
 /************************/
 /* Decoder Super Struct */
@@ -85,14 +86,16 @@
 {
     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
     opus_int32 nSamplesOutDec, LBRR_symbol;
-    opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
-    opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
+    opus_int16 *samplesOut1_tmp[ 2 ];
+    VARDECL( opus_int16, samplesOut1_tmp_storage );
+    VARDECL( opus_int16, samplesOut2_tmp );
     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     opus_int16 *resample_out_ptr;
     silk_decoder *psDec = ( silk_decoder * )decState;
     silk_decoder_state *channel_state = psDec->channel_state;
     opus_int has_side;
     opus_int stereo_to_mono;
+    SAVE_STACK;
 
     /**********************************/
     /* Test if first frame in payload */
@@ -132,11 +135,13 @@
                 channel_state[ n ].nb_subfr = 4;
             } else {
                 silk_assert( 0 );
+                RESTORE_STACK;
                 return SILK_DEC_INVALID_FRAME_SIZE;
             }
             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
                 silk_assert( 0 );
+                RESTORE_STACK;
                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
             }
             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
@@ -153,6 +158,7 @@
 
     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+        RESTORE_STACK;
         return( ret );
     }
 
@@ -240,6 +246,14 @@
         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
     }
 
+    ALLOC( samplesOut1_tmp_storage,
+           decControl->nChannelsInternal*(
+               channel_state[ 0 ].frame_length + 2 ),
+           opus_int16 );
+    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
+    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+                           + channel_state[ 0 ].frame_length + 2;
+
     if( lostFlag == FLAG_DECODE_NORMAL ) {
         has_side = !decode_only_middle;
     } else {
@@ -285,6 +299,8 @@
     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
 
     /* Set up pointers to temp buffers */
+    ALLOC( samplesOut2_tmp,
+           decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
     if( decControl->nChannelsAPI == 2 ) {
         resample_out_ptr = samplesOut2_tmp;
     } else {
@@ -337,6 +353,7 @@
     } else {
        psDec->prev_decode_only_middle = decode_only_middle;
     }
+    RESTORE_STACK;
     return ret;
 }
 
diff --git a/silk/decode_core.c b/silk/decode_core.c
index 2142dd4..0365ffd 100644
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -30,6 +30,7 @@
 #endif
 
 #include "main.h"
+#include "stack_alloc.h"
 
 /**********************************************************/
 /* Core decoder. Performs inverse NSQ operation LTP + LPC */
@@ -43,15 +44,21 @@
 {
     opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
     opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
-    opus_int16 sLTP[ MAX_FRAME_LENGTH ];
-    opus_int32 sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q15 );
     opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
     opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
-    opus_int32 res_Q14[ MAX_SUB_FRAME_LENGTH ];
-    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+    VARDECL( opus_int32, res_Q14 );
+    VARDECL( opus_int32, sLPC_Q14 );
+    SAVE_STACK;
 
     silk_assert( psDec->prev_gain_Q16 != 0 );
 
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+    ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
+    ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
+
     offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
 
     if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
@@ -227,4 +234,5 @@
 
     /* Save LPC state */
     silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+    RESTORE_STACK;
 }
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index 9db93d8..3e4a6e2 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -30,6 +30,7 @@
 #endif
 
 #include "main.h"
+#include "stack_alloc.h"
 #include "PLC.h"
 
 /****************/
@@ -44,12 +45,16 @@
     opus_int                    condCoding                      /* I    The type of conditional coding to use       */
 )
 {
-    silk_decoder_control sDecCtrl;
+    VARDECL( silk_decoder_control, psDecCtrl );
     opus_int         L, mv_len, ret = 0;
-    opus_int         pulses[ MAX_FRAME_LENGTH ];
+    VARDECL( opus_int, pulses );
+    SAVE_STACK;
 
     L = psDec->frame_length;
-    sDecCtrl.LTP_scale_Q14 = 0;
+    ALLOC( psDecCtrl, 1, silk_decoder_control );
+    ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+    psDecCtrl->LTP_scale_Q14 = 0;
 
     /* Safety checks */
     silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@@ -71,20 +76,17 @@
         /********************************************/
         /* Decode parameters and pulse signal       */
         /********************************************/
-        silk_decode_parameters( psDec, &sDecCtrl, condCoding );
-
-        /* Update length. Sampling frequency may have changed */
-        L = psDec->frame_length;
+        silk_decode_parameters( psDec, psDecCtrl, condCoding );
 
         /********************************************************/
         /* Run inverse NSQ                                      */
         /********************************************************/
-        silk_decode_core( psDec, &sDecCtrl, pOut, pulses );
+        silk_decode_core( psDec, psDecCtrl, pOut, pulses );
 
         /********************************************************/
         /* Update PLC state                                     */
         /********************************************************/
-        silk_PLC( psDec, &sDecCtrl, pOut, 0 );
+        silk_PLC( psDec, psDecCtrl, pOut, 0 );
 
         psDec->lossCnt = 0;
         psDec->prevSignalType = psDec->indices.signalType;
@@ -94,7 +96,7 @@
         psDec->first_frame_after_reset = 0;
     } else {
         /* Handle packet loss by extrapolation */
-        silk_PLC( psDec, &sDecCtrl, pOut, 1 );
+        silk_PLC( psDec, psDecCtrl, pOut, 1 );
     }
 
     /*************************/
@@ -113,13 +115,14 @@
     /************************************************/
     /* Comfort noise generation / estimation        */
     /************************************************/
-    silk_CNG( psDec, &sDecCtrl, pOut, L );
+    silk_CNG( psDec, psDecCtrl, pOut, L );
 
     /* Update some decoder state variables */
-    psDec->lagPrev = sDecCtrl.pitchL[ psDec->nb_subfr - 1 ];
+    psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
 
     /* Set output frame length */
     *pN = L;
 
+    RESTORE_STACK;
     return ret;
 }