Fix memory issues in Projection API.

Modified by Jean-Marc Valin

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
diff --git a/src/mapping_matrix.c b/src/mapping_matrix.c
index 5268bc6..d3d59d6 100644
--- a/src/mapping_matrix.c
+++ b/src/mapping_matrix.c
@@ -41,12 +41,12 @@
 
 int mapping_matrix_get_size(int rows, int cols)
 {
-  return align(sizeof(MappingMatrix)) + rows * cols * sizeof(opus_int16);
+  return align(sizeof(MappingMatrix)) + align(rows * cols * sizeof(opus_int16));
 }
 
 opus_int16 *mapping_matrix_get_data(const MappingMatrix *matrix)
 {
-  return (opus_int16*)(void *)(matrix + align(sizeof(MappingMatrix)));
+  return (opus_int16*)((char*)matrix + align(sizeof(MappingMatrix)));
 }
 
 void mapping_matrix_init(MappingMatrix * const matrix,
@@ -58,7 +58,7 @@
 #if !defined(ENABLE_ASSERTIONS)
   (void)data_size;
 #endif
-  celt_assert((opus_uint32)data_size == rows * cols * sizeof(opus_int16));
+  celt_assert(align(data_size) == align(rows * cols * sizeof(opus_int16)));
 
   matrix->rows = rows;
   matrix->cols = cols;
@@ -71,18 +71,18 @@
 }
 
 #ifndef DISABLE_FLOAT_API
-void mapping_matrix_multiply_float(const MappingMatrix *matrix,
-                                   const float *input, int input_rows,
-                                   float *output, int output_rows,
-                                   int frame_size)
+void mapping_matrix_multiply_channel_in_float(
+    const MappingMatrix *matrix,
+    const float *input,
+    int input_rows,
+    opus_val16 *output,
+    int output_row,
+    int output_rows,
+    int frame_size)
 {
-  /* Matrix data is ordered col-wise.
-   * Input (x) is [n x k], output (y) is [m x k], matrix (M) is [m x n]:
-   *   y = M x
-   */
+  /* Matrix data is ordered col-wise. */
   opus_int16* matrix_data;
-  int i, row, col;
-  float matrix_cell, input_sample;
+  int i, col;
 
   celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows);
 
@@ -90,31 +90,70 @@
 
   for (i = 0; i < frame_size; i++)
   {
+    float tmp = 0;
+    for (col = 0; col < input_rows; col++)
+    {
+      tmp +=
+        matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] *
+        input[MATRIX_INDEX(input_rows, col, i)];
+    }
+#if defined(FIXED_POINT)
+    output[output_rows * i] = FLOAT2INT16((1/32768.f)*tmp);
+#else
+    output[output_rows * i] = (1/32768.f)*tmp;
+#endif
+  }
+}
+
+void mapping_matrix_multiply_channel_out_float(
+    const MappingMatrix *matrix,
+    const opus_val16 *input,
+    int input_row,
+    int input_rows,
+    float *output,
+    int output_rows,
+    int frame_size
+)
+{
+  /* Matrix data is ordered col-wise. */
+  opus_int16* matrix_data;
+  int i, row;
+  float input_sample;
+
+  celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows);
+
+  matrix_data = mapping_matrix_get_data(matrix);
+
+  for (i = 0; i < frame_size; i++)
+  {
+#if defined(FIXED_POINT)
+    input_sample = (1/32768.f)*input[input_rows * i];
+#else
+    input_sample = input[input_rows * i];
+#endif
     for (row = 0; row < output_rows; row++)
     {
-      output[MATRIX_INDEX(output_rows, row, i)] = 0;
-      for (col = 0; col < input_rows; col++)
-      {
-        matrix_cell = (0.000030518f)*(float)matrix_data[MATRIX_INDEX(matrix->rows, row, col)];
-        input_sample = input[MATRIX_INDEX(input_rows, col, i)];
-        output[MATRIX_INDEX(output_rows, row, i)] += matrix_cell * input_sample;
-      }
+      float tmp =
+        (1/32768.f)*matrix_data[MATRIX_INDEX(matrix->rows, row, input_row)] *
+        input_sample;
+      output[MATRIX_INDEX(output_rows, row, i)] += tmp;
     }
   }
 }
 #endif /* DISABLE_FLOAT_API */
 
-void mapping_matrix_multiply_short(const MappingMatrix *matrix,
-                                   const opus_int16 *input, int input_rows,
-                                   opus_int16 *output, int output_rows,
-                                   int frame_size)
+void mapping_matrix_multiply_channel_in_short(
+    const MappingMatrix *matrix,
+    const opus_int16 *input,
+    int input_rows,
+    opus_val16 *output,
+    int output_row,
+    int output_rows,
+    int frame_size)
 {
-  /* Matrix data is ordered col-wise.
-   * Input (x) is [n x k], output (y) is [m x k], matrix (M) is [m x n]:
-   *   y = M x
-   */
+  /* Matrix data is ordered col-wise. */
   opus_int16* matrix_data;
-  int i, row, col;
+  int i, col;
 
   celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows);
 
@@ -122,16 +161,58 @@
 
   for (i = 0; i < frame_size; i++)
   {
+    opus_val32 tmp = 0;
+    for (col = 0; col < input_rows; col++)
+    {
+#if defined(FIXED_POINT)
+      tmp +=
+        ((opus_int32)matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] *
+        (opus_int32)input[MATRIX_INDEX(input_rows, col, i)]) >> 8;
+#else
+      tmp +=
+        matrix_data[MATRIX_INDEX(matrix->rows, output_row, col)] *
+        input[MATRIX_INDEX(input_rows, col, i)];
+#endif
+    }
+#if defined(FIXED_POINT)
+    output[output_rows * i] = (opus_int16)((tmp + 64) >> 7);
+#else
+    output[output_rows * i] = (1/(32768.f*32768.f))*tmp;
+#endif
+  }
+}
+
+void mapping_matrix_multiply_channel_out_short(
+    const MappingMatrix *matrix,
+    const opus_val16 *input,
+    int input_row,
+    int input_rows,
+    opus_int16 *output,
+    int output_rows,
+    int frame_size)
+{
+  /* Matrix data is ordered col-wise. */
+  opus_int16* matrix_data;
+  int i, row;
+  opus_int32 input_sample;
+
+  celt_assert(input_rows <= matrix->cols && output_rows <= matrix->rows);
+
+  matrix_data = mapping_matrix_get_data(matrix);
+
+  for (i = 0; i < frame_size; i++)
+  {
+#if defined(FIXED_POINT)
+    input_sample = (opus_int32)input[input_rows * i];
+#else
+    input_sample = (opus_int32)FLOAT2INT16(input[input_rows * i]);
+#endif
     for (row = 0; row < output_rows; row++)
     {
-      opus_int32 tmp = 0;
-      for (col = 0; col < input_rows; col++)
-      {
-        tmp +=
-          (matrix_data[MATRIX_INDEX(matrix->rows, row, col)] *
-          input[MATRIX_INDEX(input_rows, col, i)]) >> 8;
-      }
-      output[MATRIX_INDEX(output_rows, row, i)] = (tmp + 64)>>7;
+      opus_int32 tmp =
+        (opus_int32)matrix_data[MATRIX_INDEX(matrix->rows, row, input_row)] *
+        input_sample;
+      output[MATRIX_INDEX(output_rows, row, i)] += (tmp + 16384) >> 15;
     }
   }
 }
diff --git a/src/mapping_matrix.h b/src/mapping_matrix.h
index 8fe82ea..381c8e1 100644
--- a/src/mapping_matrix.h
+++ b/src/mapping_matrix.h
@@ -44,8 +44,8 @@
 
 typedef struct MappingMatrix
 {
-    int rows;
-    int cols;
+    int rows; /* number of channels outputted from matrix. */
+    int cols; /* number of channels inputted to matrix. */
     int gain; /* in dB. S7.8-format. */
     /* Matrix cell data goes here using col-wise ordering. */
 } MappingMatrix;
@@ -64,20 +64,42 @@
 );
 
 #ifndef DISABLE_FLOAT_API
-void mapping_matrix_multiply_float(
+void mapping_matrix_multiply_channel_in_float(
     const MappingMatrix *matrix,
     const float *input,
     int input_rows,
+    opus_val16 *output,
+    int output_row,
+    int output_rows,
+    int frame_size
+);
+
+void mapping_matrix_multiply_channel_out_float(
+    const MappingMatrix *matrix,
+    const opus_val16 *input,
+    int input_row,
+    int input_rows,
     float *output,
     int output_rows,
     int frame_size
 );
 #endif /* DISABLE_FLOAT_API */
 
-void mapping_matrix_multiply_short(
+void mapping_matrix_multiply_channel_in_short(
     const MappingMatrix *matrix,
     const opus_int16 *input,
     int input_rows,
+    opus_val16 *output,
+    int output_row,
+    int output_rows,
+    int frame_size
+);
+
+void mapping_matrix_multiply_channel_out_short(
+    const MappingMatrix *matrix,
+    const opus_val16 *input,
+    int input_row,
+    int input_rows,
     opus_int16 *output,
     int output_rows,
     int frame_size
diff --git a/src/opus_multistream_decoder.c b/src/opus_multistream_decoder.c
index 6ab5d4c..d5dbf4f 100644
--- a/src/opus_multistream_decoder.c
+++ b/src/opus_multistream_decoder.c
@@ -135,15 +135,6 @@
    return st;
 }
 
-typedef void (*opus_copy_channel_out_func)(
-  void *dst,
-  int dst_stride,
-  int dst_channel,
-  const opus_val16 *src,
-  int src_stride,
-  int frame_size
-);
-
 static int opus_multistream_packet_validate(const unsigned char *data,
       opus_int32 len, int nb_streams, opus_int32 Fs)
 {
@@ -173,7 +164,7 @@
    return samples;
 }
 
-static int opus_multistream_decode_native(
+int opus_multistream_decode_native(
       OpusMSDecoder *st,
       const unsigned char *data,
       opus_int32 len,
@@ -181,7 +172,8 @@
       opus_copy_channel_out_func copy_channel_out,
       int frame_size,
       int decode_fec,
-      int soft_clip
+      int soft_clip,
+      void *user_data
 )
 {
    opus_int32 Fs;
@@ -258,7 +250,7 @@
          while ( (chan = get_left_channel(&st->layout, s, prev)) != -1)
          {
             (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
-               buf, 2, frame_size);
+               buf, 2, frame_size, user_data);
             prev = chan;
          }
          prev = -1;
@@ -266,7 +258,7 @@
          while ( (chan = get_right_channel(&st->layout, s, prev)) != -1)
          {
             (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
-               buf+1, 2, frame_size);
+               buf+1, 2, frame_size, user_data);
             prev = chan;
          }
       } else {
@@ -276,7 +268,7 @@
          while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1)
          {
             (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
-               buf, 1, frame_size);
+               buf, 1, frame_size, user_data);
             prev = chan;
          }
       }
@@ -287,7 +279,7 @@
       if (st->layout.mapping[c] == 255)
       {
          (*copy_channel_out)(pcm, st->layout.nb_channels, c,
-            NULL, 0, frame_size);
+            NULL, 0, frame_size, user_data);
       }
    }
    RESTORE_STACK;
@@ -301,11 +293,13 @@
   int dst_channel,
   const opus_val16 *src,
   int src_stride,
-  int frame_size
+  int frame_size,
+  void *user_data
 )
 {
    float *float_dst;
    opus_int32 i;
+   (void)user_data;
    float_dst = (float*)dst;
    if (src != NULL)
    {
@@ -330,11 +324,13 @@
   int dst_channel,
   const opus_val16 *src,
   int src_stride,
-  int frame_size
+  int frame_size,
+  void *user_data
 )
 {
    opus_int16 *short_dst;
    opus_int32 i;
+   (void)user_data;
    short_dst = (opus_int16*)dst;
    if (src != NULL)
    {
@@ -365,7 +361,7 @@
 )
 {
    return opus_multistream_decode_native(st, data, len,
-       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0);
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0, NULL);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -373,7 +369,7 @@
       opus_int32 len, float *pcm, int frame_size, int decode_fec)
 {
    return opus_multistream_decode_native(st, data, len,
-       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0, NULL);
 }
 #endif
 
@@ -383,20 +379,20 @@
       opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
 {
    return opus_multistream_decode_native(st, data, len,
-       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1);
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1, NULL);
 }
 
 int opus_multistream_decode_float(
       OpusMSDecoder *st,
       const unsigned char *data,
       opus_int32 len,
-      float *pcm,
+      opus_val16 *pcm,
       int frame_size,
       int decode_fec
 )
 {
    return opus_multistream_decode_native(st, data, len,
-       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0, NULL);
 }
 #endif
 
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index 07de6cc..1df9e7d 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -61,15 +61,6 @@
       {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
 };
 
-typedef void (*opus_copy_channel_in_func)(
-  opus_val16 *dst,
-  int dst_stride,
-  const void *src,
-  int src_stride,
-  int src_channel,
-  int frame_size
-);
-
 static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
 {
    int s;
@@ -274,7 +265,7 @@
       int nb_frames = frame_size/freq_size;
       celt_assert(nb_frames*freq_size == frame_size);
       OPUS_COPY(in, mem+c*overlap, overlap);
-      (*copy_channel_in)(x, 1, pcm, channels, c, len);
+      (*copy_channel_in)(x, 1, pcm, channels, c, len, NULL);
       celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
 #ifndef FIXED_POINT
       {
@@ -862,7 +853,7 @@
 
 /* Max size in case the encoder decides to return six frames (6 x 20 ms = 120 ms) */
 #define MS_FRAME_TMP (6*1275+12)
-static int opus_multistream_encode_native
+int opus_multistream_encode_native
 (
     OpusMSEncoder *st,
     opus_copy_channel_in_func copy_channel_in,
@@ -872,7 +863,8 @@
     opus_int32 max_data_bytes,
     int lsb_depth,
     downmix_func downmix,
-    int float_api
+    int float_api,
+    void *user_data
 )
 {
    opus_int32 Fs;
@@ -1006,9 +998,9 @@
          left = get_left_channel(&st->layout, s, -1);
          right = get_right_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 2,
-            pcm, st->layout.nb_channels, left, frame_size);
+            pcm, st->layout.nb_channels, left, frame_size, user_data);
          (*copy_channel_in)(buf+1, 2,
-            pcm, st->layout.nb_channels, right, frame_size);
+            pcm, st->layout.nb_channels, right, frame_size, user_data);
          ptr += align(coupled_size);
          if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
@@ -1024,7 +1016,7 @@
          int i;
          int chan = get_mono_channel(&st->layout, s, -1);
          (*copy_channel_in)(buf, 1,
-            pcm, st->layout.nb_channels, chan, frame_size);
+            pcm, st->layout.nb_channels, chan, frame_size, user_data);
          ptr += align(mono_size);
          if (st->mapping_type == MAPPING_TYPE_SURROUND)
          {
@@ -1083,11 +1075,13 @@
   const void *src,
   int src_stride,
   int src_channel,
-  int frame_size
+  int frame_size,
+  void *user_data
 )
 {
    const float *float_src;
    opus_int32 i;
+   (void)user_data;
    float_src = (const float *)src;
    for (i=0;i<frame_size;i++)
 #if defined(FIXED_POINT)
@@ -1104,11 +1098,13 @@
   const void *src,
   int src_stride,
   int src_channel,
-  int frame_size
+  int frame_size,
+  void *user_data
 )
 {
    const opus_int16 *short_src;
    opus_int32 i;
+   (void)user_data;
    short_src = (const opus_int16 *)src;
    for (i=0;i<frame_size;i++)
 #if defined(FIXED_POINT)
@@ -1129,7 +1125,7 @@
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0, NULL);
 }
 
 #ifndef DISABLE_FLOAT_API
@@ -1142,7 +1138,7 @@
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1, NULL);
 }
 #endif
 
@@ -1158,7 +1154,7 @@
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1);
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1, NULL);
 }
 
 int opus_multistream_encode(
@@ -1170,7 +1166,7 @@
 )
 {
    return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0, NULL);
 }
 #endif
 
diff --git a/src/opus_private.h b/src/opus_private.h
index acbb0ae..193ff93 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -88,7 +88,25 @@
 int get_right_channel(const ChannelLayout *layout, int stream_id, int prev);
 int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
 
+typedef void (*opus_copy_channel_in_func)(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size,
+  void *user_data
+);
 
+typedef void (*opus_copy_channel_out_func)(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size,
+  void *user_data
+);
 
 #define MODE_SILK_ONLY          1000
 #define MODE_HYBRID             1001
@@ -156,4 +174,30 @@
 
 int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
 
+int opus_multistream_encode_native
+(
+  struct OpusMSEncoder *st,
+  opus_copy_channel_in_func copy_channel_in,
+  const void *pcm,
+  int analysis_frame_size,
+  unsigned char *data,
+  opus_int32 max_data_bytes,
+  int lsb_depth,
+  downmix_func downmix,
+  int float_api,
+  void *user_data
+);
+
+int opus_multistream_decode_native(
+  struct OpusMSDecoder *st,
+  const unsigned char *data,
+  opus_int32 len,
+  void *pcm,
+  opus_copy_channel_out_func copy_channel_out,
+  int frame_size,
+  int decode_fec,
+  int soft_clip,
+  void *user_data
+);
+
 #endif /* OPUS_PRIVATE_H */
diff --git a/src/opus_projection_decoder.c b/src/opus_projection_decoder.c
index 0d1ee41..6cdadbe 100644
--- a/src/opus_projection_decoder.c
+++ b/src/opus_projection_decoder.c
@@ -42,18 +42,63 @@
 
 struct OpusProjectionDecoder
 {
-  int demixing_matrix_size_in_bytes;
+  opus_int32 demixing_matrix_size_in_bytes;
   /* Encoder states go here */
 };
 
+#if !defined(DISABLE_FLOAT_API)
+static void opus_projection_copy_channel_out_float(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size,
+  void *user_data)
+{
+  float *float_dst;
+  const MappingMatrix *matrix;
+  float_dst = (float *)dst;
+  matrix = (const MappingMatrix *)user_data;
+
+  if (dst_channel == 0)
+    OPUS_CLEAR(float_dst, frame_size * dst_stride);
+
+  if (src != NULL)
+    mapping_matrix_multiply_channel_out_float(matrix, src, dst_channel,
+      src_stride, float_dst, dst_stride, frame_size);
+}
+#endif
+
+static void opus_projection_copy_channel_out_short(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size,
+  void *user_data)
+{
+  opus_int16 *short_dst;
+  const MappingMatrix *matrix;
+  short_dst = (opus_int16 *)dst;
+  matrix = (const MappingMatrix *)user_data;
+  if (dst_channel == 0)
+    OPUS_CLEAR(short_dst, frame_size * dst_stride);
+
+  if (src != NULL)
+    mapping_matrix_multiply_channel_out_short(matrix, src, dst_channel,
+      src_stride, short_dst, dst_stride, frame_size);
+}
+
 static MappingMatrix *get_demixing_matrix(OpusProjectionDecoder *st)
 {
-  return (MappingMatrix *)((char*)st + align(sizeof(OpusProjectionDecoder)));
+  return (MappingMatrix*)((char*)st + align(sizeof(OpusProjectionDecoder)));
 }
 
 static OpusMSDecoder *get_multistream_decoder(OpusProjectionDecoder *st)
 {
-  return (OpusMSDecoder *)((char*)st + align(sizeof(OpusProjectionDecoder) +
+  return (OpusMSDecoder*)((char*)st + align(sizeof(OpusProjectionDecoder) +
     st->demixing_matrix_size_in_bytes));
 }
 
@@ -69,7 +114,7 @@
   if (!decoder_size)
     return 0;
 
-  return align(sizeof(OpusProjectionDecoder) + matrix_size + decoder_size);
+  return align(sizeof(OpusProjectionDecoder)) + matrix_size + decoder_size;
 }
 
 int opus_projection_decoder_init(OpusProjectionDecoder *st, opus_int32 Fs,
@@ -86,14 +131,14 @@
   /* Verify supplied matrix size. */
   nb_input_streams = streams + coupled_streams;
   expected_matrix_size = nb_input_streams * channels * sizeof(opus_int16);
-  if (expected_matrix_size != demixing_matrix_size)
+  if (align(expected_matrix_size) != align(demixing_matrix_size))
   {
     RESTORE_STACK;
     return OPUS_BAD_ARG;
   }
 
   /* Convert demixing matrix input into internal format. */
-  ALLOC(buf, demixing_matrix_size, opus_int16);
+  ALLOC(buf, nb_input_streams * channels, opus_int16);
   for (i = 0; i < nb_input_streams * channels; i++)
   {
     int s = demixing_matrix[2*i + 1] << 8 | demixing_matrix[2*i];
@@ -102,15 +147,13 @@
   }
 
   /* Assign demixing matrix. */
-  st->demixing_matrix_size_in_bytes = expected_matrix_size;
+  st->demixing_matrix_size_in_bytes = mapping_matrix_get_size(channels, nb_input_streams);
   mapping_matrix_init(get_demixing_matrix(st), channels, nb_input_streams, 0,
     buf, demixing_matrix_size);
 
   /* Set trivial mapping so each input channel pairs with a matrix column. */
   for (i = 0; i < channels; i++)
-  {
     mapping[i] = i;
-  }
 
   ret = opus_multistream_decoder_init(
     get_multistream_decoder(st), Fs, channels, streams, coupled_streams, mapping);
@@ -154,63 +197,33 @@
   return st;
 }
 
+#ifdef FIXED_POINT
 int opus_projection_decode(OpusProjectionDecoder *st, const unsigned char *data,
                            opus_int32 len, opus_int16 *pcm, int frame_size,
                            int decode_fec)
 {
-#ifdef NONTHREADSAFE_PSEUDOSTACK
-  celt_fatal("Unable to use opus_projection_decode() when NONTHREADSAFE_PSEUDOSTACK is defined.");
-#endif
-  MappingMatrix *matrix;
-  OpusMSDecoder *ms_decoder;
-  int ret;
-  VARDECL(opus_int16, buf);
-  ALLOC_STACK;
-
-  ms_decoder = get_multistream_decoder(st);
-  ALLOC(buf, (ms_decoder->layout.nb_streams + ms_decoder->layout.nb_coupled_streams) *
-    frame_size, opus_int16);
-  ret = opus_multistream_decode(ms_decoder, data, len, buf, frame_size,
-                                        decode_fec);
-  if (ret <= 0)
-    return ret;
-  frame_size = ret;
-  matrix = get_demixing_matrix(st);
-  mapping_matrix_multiply_short(matrix, buf,
-    ms_decoder->layout.nb_streams + ms_decoder->layout.nb_coupled_streams,
-    pcm, ms_decoder->layout.nb_channels, frame_size);
-  RESTORE_STACK;
-  return frame_size;
+  return opus_multistream_decode_native(get_multistream_decoder(st), data, len,
+    pcm, opus_projection_copy_channel_out_short, frame_size, decode_fec, 0,
+    get_demixing_matrix(st));
 }
+#else
+int opus_projection_decode(OpusProjectionDecoder *st, const unsigned char *data,
+                           opus_int32 len, opus_int16 *pcm, int frame_size,
+                           int decode_fec)
+{
+  return opus_multistream_decode_native(get_multistream_decoder(st), data, len,
+    pcm, opus_projection_copy_channel_out_short, frame_size, decode_fec, 1,
+    get_demixing_matrix(st));
+}
+#endif
 
 #ifndef DISABLE_FLOAT_API
 int opus_projection_decode_float(OpusProjectionDecoder *st, const unsigned char *data,
-                                 opus_int32 len, float *pcm,
-                                 int frame_size, int decode_fec)
+                                 opus_int32 len, float *pcm, int frame_size, int decode_fec)
 {
-#ifdef NONTHREADSAFE_PSEUDOSTACK
-  celt_fatal("Unable to use opus_projection_decode_float() when NONTHREADSAFE_PSEUDOSTACK is defined.");
-#endif
-  MappingMatrix *matrix;
-  OpusMSDecoder *ms_decoder;
-  int ret;
-  VARDECL(float, buf);
-  ALLOC_STACK;
-
-  ms_decoder = get_multistream_decoder(st);
-  ALLOC(buf, (ms_decoder->layout.nb_streams + ms_decoder->layout.nb_coupled_streams) *
-    frame_size, float);
-  ret = opus_multistream_decode_float(ms_decoder, data, len, buf,
-                                      frame_size, decode_fec);
-  if (ret <= 0)
-    return ret;
-  frame_size = ret;
-  matrix = get_demixing_matrix(st);
-  mapping_matrix_multiply_float(matrix, buf,
-    ms_decoder->layout.nb_streams + ms_decoder->layout.nb_coupled_streams,
-    pcm, ms_decoder->layout.nb_channels, frame_size);
-  RESTORE_STACK;
-  return frame_size;
+  return opus_multistream_decode_native(get_multistream_decoder(st), data, len,
+    pcm, opus_projection_copy_channel_out_float, frame_size, decode_fec, 0,
+    get_demixing_matrix(st));
 }
 #endif
 
diff --git a/src/opus_projection_encoder.c b/src/opus_projection_encoder.c
index a2c2f35..3cf516a 100644
--- a/src/opus_projection_encoder.c
+++ b/src/opus_projection_encoder.c
@@ -42,11 +42,41 @@
 
 struct OpusProjectionEncoder
 {
-  int mixing_matrix_size_in_bytes;
-  int demixing_matrix_size_in_bytes;
+  opus_int32 mixing_matrix_size_in_bytes;
+  opus_int32 demixing_matrix_size_in_bytes;
   /* Encoder states go here */
 };
 
+#if !defined(DISABLE_FLOAT_API)
+static void opus_projection_copy_channel_in_float(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size,
+  void *user_data
+)
+{
+  mapping_matrix_multiply_channel_in_float((const MappingMatrix*)user_data,
+    (const float*)src, src_stride, dst, src_channel, dst_stride, frame_size);
+}
+#endif
+
+static void opus_projection_copy_channel_in_short(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size,
+  void *user_data
+)
+{
+  mapping_matrix_multiply_channel_in_short((const MappingMatrix*)user_data,
+    (const opus_int16*)src, src_stride, dst, src_channel, dst_stride, frame_size);
+}
+
 static int get_order_plus_one_from_channels(int channels, int *order_plus_one)
 {
   int order_plus_one_;
@@ -115,10 +145,8 @@
 
   ret = get_streams_from_channels(channels, mapping_family, &nb_streams,
                                   &nb_coupled_streams, &order_plus_one);
-  if (ret != OPUS_OK)
-  {
+  if (ret != OPUS_OK || order_plus_one < 2 || order_plus_one > 4)
     return 0;
-  }
 
   matrix_rows = order_plus_one * order_plus_one + 2;
   matrix_size = mapping_matrix_get_size(matrix_rows, matrix_rows);
@@ -126,7 +154,7 @@
       opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
   if (!encoder_size)
     return 0;
-  return align(sizeof(OpusProjectionEncoder) + matrix_size + matrix_size + encoder_size);
+  return align(sizeof(OpusProjectionEncoder)) + matrix_size + matrix_size + encoder_size;
 }
 
 int opus_projection_ambisonics_encoder_init(OpusProjectionEncoder *st, opus_int32 Fs,
@@ -218,9 +246,7 @@
 
   /* Set trivial mapping so each input channel pairs with a matrix column. */
   for (i = 0; i < channels; i++)
-  {
     mapping[i] = i;
-  }
 
   /* Initialize multistream encoder with provided settings. */
   ms_encoder = get_multistream_encoder(st);
@@ -269,54 +295,31 @@
                            int frame_size, unsigned char *data,
                            opus_int32 max_data_bytes)
 {
-#ifdef NONTHREADSAFE_PSEUDOSTACK
-  celt_fatal("Unable to use opus_projection_encode() when NONTHREADSAFE_PSEUDOSTACK is defined.");
-#endif
-  MappingMatrix *matrix;
-  OpusMSEncoder *ms_encoder;
-  int ret;
-  VARDECL(opus_int16, buf);
-  ALLOC_STACK;
-
-  matrix = get_mixing_matrix(st);
-  ms_encoder = get_multistream_encoder(st);
-  ALLOC(buf, (ms_encoder->layout.nb_streams + ms_encoder->layout.nb_coupled_streams) *
-    frame_size, opus_int16);
-  mapping_matrix_multiply_short(matrix, pcm,
-    ms_encoder->layout.nb_channels, buf,
-    ms_encoder->layout.nb_streams + ms_encoder->layout.nb_coupled_streams,
-    frame_size);
-  ret = opus_multistream_encode(ms_encoder, buf, frame_size, data, max_data_bytes);
-  RESTORE_STACK;
-  return ret;
+  return opus_multistream_encode_native(get_multistream_encoder(st),
+    opus_projection_copy_channel_in_short, pcm, frame_size, data,
+    max_data_bytes, 16, downmix_int, 0, get_mixing_matrix(st));
 }
 
 #ifndef DISABLE_FLOAT_API
+#ifdef FIXED_POINT
 int opus_projection_encode_float(OpusProjectionEncoder *st, const float *pcm,
                                  int frame_size, unsigned char *data,
                                  opus_int32 max_data_bytes)
 {
-#ifdef NONTHREADSAFE_PSEUDOSTACK
-  celt_fatal("Unable to use opus_projection_encode_float() when NONTHREADSAFE_PSEUDOSTACK is defined.");
-#endif
-  MappingMatrix *matrix;
-  OpusMSEncoder *ms_encoder;
-  int ret;
-  VARDECL(float, buf);
-  ALLOC_STACK;
-
-  matrix = get_mixing_matrix(st);
-  ms_encoder = get_multistream_encoder(st);
-  ALLOC(buf, (ms_encoder->layout.nb_streams + ms_encoder->layout.nb_coupled_streams) *
-    frame_size, float);
-  mapping_matrix_multiply_float(matrix, pcm,
-    ms_encoder->layout.nb_channels, buf,
-    ms_encoder->layout.nb_streams + ms_encoder->layout.nb_coupled_streams,
-    frame_size);
-  ret = opus_multistream_encode_float(ms_encoder, buf, frame_size, data, max_data_bytes);
-  RESTORE_STACK;
-  return ret;
+  return opus_multistream_encode_native(get_multistream_encoder(st),
+    opus_projection_copy_channel_in_float, pcm, frame_size, data,
+    max_data_bytes, 16, downmix_float, 1, get_mixing_matrix(st));
 }
+#else
+int opus_projection_encode_float(OpusProjectionEncoder *st, const float *pcm,
+                                 int frame_size, unsigned char *data,
+                                 opus_int32 max_data_bytes)
+{
+  return opus_multistream_encode_native(get_multistream_encoder(st),
+    opus_projection_copy_channel_in_float, pcm, frame_size, data,
+    max_data_bytes, 24, downmix_float, 1, get_mixing_matrix(st));
+}
+#endif
 #endif
 
 void opus_projection_encoder_destroy(OpusProjectionEncoder *st)
diff --git a/tests/test_opus_projection.c b/tests/test_opus_projection.c
index 4b11c12..01dc42d 100644
--- a/tests/test_opus_projection.c
+++ b/tests/test_opus_projection.c
@@ -47,170 +47,141 @@
 #define BUFFER_SIZE 960
 #define MAX_DATA_BYTES 32768
 #define MAX_FRAME_SAMPLES 5760
+#define ERROR_TOLERANCE 1
 
-#define INT16_TO_FLOAT(x) ((1/32768.f)*(float)x)
+#define SIMPLE_MATRIX_SIZE 12
+#define SIMPLE_MATRIX_FRAME_SIZE 10
+#define SIMPLE_MATRIX_INPUT_SIZE 30
+#define SIMPLE_MATRIX_OUTPUT_SIZE 40
 
-void print_matrix_short(const opus_int16 *data, int rows, int cols)
+int assert_is_equal(
+  const opus_val16 *a, const opus_int16 *b, int size, opus_int16 tolerance)
 {
-  int i, j;
-  for (i = 0; i < rows; i++)
+  int i;
+  for (i = 0; i < size; i++)
   {
-    for (j = 0; j < cols; j++)
-    {
-      fprintf(stderr, "%8.5f  ", (float)INT16_TO_FLOAT(data[j * rows + i]));
-    }
-    fprintf(stderr, "\n");
+#ifdef FIXED_POINT
+    opus_int16 val = a[i];
+#else
+    opus_int16 val = FLOAT2INT16(a[i]);
+#endif
+    if (abs(val - b[i]) > tolerance)
+      return 1;
   }
-  fprintf(stderr, "\n");
+  return 0;
 }
 
-void print_matrix_float(const float *data, int rows, int cols)
-{
-  int i, j;
-  for (i = 0; i < rows; i++)
-  {
-    for (j = 0; j < cols; j++)
-    {
-      fprintf(stderr, "%8.5f ", data[j * rows + i]);
-    }
-    fprintf(stderr, "\n");
-  }
-  fprintf(stderr, "\n");
-}
-
-void print_matrix(MappingMatrix *matrix)
-{
-  opus_int16 *data;
-
-  fprintf(stderr, "%d x %d, gain: %d\n", matrix->rows, matrix->cols,
-    matrix->gain);
-
-  data = mapping_matrix_get_data(matrix);
-  print_matrix_short(data, matrix->rows, matrix->cols);
-}
-
-int assert_transform_short(
+int assert_is_equal_short(
   const opus_int16 *a, const opus_int16 *b, int size, opus_int16 tolerance)
 {
   int i;
   for (i = 0; i < size; i++)
-  {
     if (abs(a[i] - b[i]) > tolerance)
-    {
-      return 0;
-    }
-  }
-  return 1;
+      return 1;
+  return 0;
 }
 
-int assert_transform_float(
-  const float *a, const float *b, int size, float tolerance)
+void test_simple_matrix(void)
 {
-  int i;
-  for (i = 0; i < size; i++)
-  {
-    if (fabsf(a[i] - b[i]) > tolerance)
-    {
-      return 0;
-    }
-  }
-  return 1;
-}
-
-void test_matrix_transform(void)
-{
-  /* Create testing mixing matrix (4 x 3), gain 0dB:
-  *   [ 0 1 0 ]
-  *   [ 1 0 0 ]
-  *   [ 0 0 0 ]
-  *   [ 0 0 1 ]
-  */
-  opus_int32 matrix_size;
-  MappingMatrix *testing_matrix;
-  const opus_int16 testing_matrix_data[12] = {
-    0, 32767, 0, 0, 32767, 0, 0, 0, 0, 0, 0, 32767 };
-
-  const int frame_size = 10;
-  const opus_int16 input[30] = {
+  const MappingMatrix simple_matrix_params = {4, 3, 0};
+  const opus_int16 simple_matrix_data[SIMPLE_MATRIX_SIZE] = {0, 32767, 0, 0, 32767, 0, 0, 0, 0, 0, 0, 32767};
+  const opus_int16 input_int16[SIMPLE_MATRIX_INPUT_SIZE] = {
     32767, 0, -32768, 29491, -3277, -29491, 26214, -6554, -26214, 22938, -9830,
     -22938, 19661, -13107, -19661, 16384, -16384, -16384, 13107, -19661, -13107,
     9830, -22938, -9830, 6554, -26214, -6554, 3277, -29491, -3277};
-  const opus_int16 expected_output[40] = {
+  const opus_int16 expected_output_int16[SIMPLE_MATRIX_OUTPUT_SIZE] = {
     0, 32767, 0, -32768, -3277, 29491, 0, -29491, -6554, 26214, 0, -26214,
     -9830, 22938, 0, -22938, -13107, 19661, 0, -19661, -16384, 16384, 0, -16384,
     -19661, 13107, 0, -13107, -22938, 9830, 0, -9830, -26214, 6554, 0, -6554,
     -29491, 3277, 0, -3277};
-  opus_int16 output[40] = {0};
 
-#ifndef DISABLE_FLOAT_API
-  int i;
-  /* Sample-accurate to -93.9794 dB */
-  float flt_tolerance = 2e-5f;
-  float input32[30] = {0};
-  float output32[40] = {0};
-  float expected_output32[40] = {0};
+  int i, ret;
+  opus_val16 *input_val16;
+  opus_val16 *output_val16;
+  opus_int16 *output_int16;
+  MappingMatrix *simple_matrix;
 
-  /* Convert short to float representations. */
-  for (i = 0; i < 30; i++)
+  /* Allocate input/output buffers. */
+  input_val16 = (opus_val16 *)opus_alloc(align(sizeof(opus_val16) * SIMPLE_MATRIX_INPUT_SIZE));
+  output_int16 = (opus_int16 *)opus_alloc(align(sizeof(opus_int16) * SIMPLE_MATRIX_OUTPUT_SIZE));
+  output_val16 = (opus_val16 *)opus_alloc(align(sizeof(opus_val16) * SIMPLE_MATRIX_OUTPUT_SIZE));
+
+  /* Initialize matrix */
+  simple_matrix = (MappingMatrix *)opus_alloc(
+    mapping_matrix_get_size(simple_matrix_params.rows,
+                            simple_matrix_params.cols));
+  mapping_matrix_init(simple_matrix, simple_matrix_params.rows,
+    simple_matrix_params.cols, simple_matrix_params.gain, simple_matrix_data,
+    sizeof(simple_matrix_data));
+
+  /* Copy inputs. */
+  for (i = 0; i < SIMPLE_MATRIX_INPUT_SIZE; i++)
   {
-    input32[i] = INT16_TO_FLOAT(input[i]);
-  }
-  for (i = 0; i < 40; i++)
-  {
-    expected_output32[i] = INT16_TO_FLOAT(expected_output[i]);
-  }
-#endif /* DISABLE_FLOAT_API */
-
-  /* Create the matrix. */
-  matrix_size = mapping_matrix_get_size(4, 3);
-  testing_matrix = (MappingMatrix *)opus_alloc(matrix_size);
-  mapping_matrix_init(testing_matrix, 4, 3, 0, testing_matrix_data,
-    12 * sizeof(opus_int16));
-
-  mapping_matrix_multiply_short(testing_matrix, input, testing_matrix->cols,
-    output, testing_matrix->rows, frame_size);
-  if (!assert_transform_short(output, expected_output, 40, 1))
-  {
-    fprintf(stderr, "Matrix:\n");
-    print_matrix(testing_matrix);
-
-    fprintf(stderr, "Input (short):\n");
-    print_matrix_short(input, testing_matrix->cols, frame_size);
-
-    fprintf(stderr, "Expected Output (short):\n");
-    print_matrix_short(expected_output, testing_matrix->rows, frame_size);
-
-    fprintf(stderr, "Output (short):\n");
-    print_matrix_short(output, testing_matrix->rows, frame_size);
-
-    goto bad_cleanup;
-  }
-
-#ifndef DISABLE_FLOAT_API
-  mapping_matrix_multiply_float(testing_matrix, input32, testing_matrix->cols,
-    output32, testing_matrix->rows, frame_size);
-  if (!assert_transform_float(output32, expected_output32, 40, flt_tolerance))
-  {
-    fprintf(stderr, "Matrix:\n");
-    print_matrix(testing_matrix);
-
-    fprintf(stderr, "Input (float):\n");
-    print_matrix_float(input32, testing_matrix->cols, frame_size);
-
-    fprintf(stderr, "Expected Output (float):\n");
-    print_matrix_float(expected_output32, testing_matrix->rows, frame_size);
-
-    fprintf(stderr, "Output (float):\n");
-    print_matrix_float(output32, testing_matrix->rows, frame_size);
-
-    goto bad_cleanup;
-  }
+#ifdef FIXED_POINT
+    input_val16[i] = input_int16[i];
+#else
+    input_val16[i] = (1/32768.f)*input_int16[i];
 #endif
-  opus_free(testing_matrix);
-  return;
-bad_cleanup:
-  opus_free(testing_matrix);
-  test_failed();
+  }
+
+  /* _in_short */
+  for (i = 0; i < SIMPLE_MATRIX_OUTPUT_SIZE; i++)
+    output_val16[i] = 0;
+  for (i = 0; i < simple_matrix->rows; i++)
+  {
+    mapping_matrix_multiply_channel_in_short(simple_matrix,
+      input_int16, simple_matrix->cols, &output_val16[i], i,
+      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
+  }
+  ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
+  if (ret)
+    test_failed();
+
+  /* _out_short */
+  for (i = 0; i < SIMPLE_MATRIX_OUTPUT_SIZE; i++)
+    output_int16[i] = 0;
+  for (i = 0; i < simple_matrix->cols; i++)
+  {
+    mapping_matrix_multiply_channel_out_short(simple_matrix,
+      &input_val16[i], i, simple_matrix->cols, output_int16,
+      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
+  }
+  ret = assert_is_equal_short(output_int16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
+  if (ret)
+    test_failed();
+
+#if !defined(DISABLE_FLOAT_API) && !defined(FIXED_POINT)
+  /* _in_float */
+  for (i = 0; i < SIMPLE_MATRIX_OUTPUT_SIZE; i++)
+    output_val16[i] = 0;
+  for (i = 0; i < simple_matrix->rows; i++)
+  {
+    mapping_matrix_multiply_channel_in_float(simple_matrix,
+      input_val16, simple_matrix->cols, &output_val16[i], i,
+      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
+  }
+  ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
+  if (ret)
+    test_failed();
+
+  /* _out_float */
+  for (i = 0; i < SIMPLE_MATRIX_OUTPUT_SIZE; i++)
+    output_val16[i] = 0;
+  for (i = 0; i < simple_matrix->cols; i++)
+  {
+    mapping_matrix_multiply_channel_out_float(simple_matrix,
+      &input_val16[i], i, simple_matrix->cols, output_val16,
+      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
+  }
+  ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
+  if (ret)
+    test_failed();
+#endif
+
+  opus_free(input_val16);
+  opus_free(output_int16);
+  opus_free(output_val16);
+  opus_free(simple_matrix);
 }
 
 void test_creation_arguments(const int channels, const int mapping_family)
@@ -403,15 +374,15 @@
   (void)_argc;
   (void)_argv;
 
-  /* Test matrix creation/multiplication. */
-  test_matrix_transform();
+  /* Test simple matrix multiplication routines. */
+  test_simple_matrix();
 
   /* Test full range of channels in creation arguments. */
   for (i = 0; i < 255; i++)
     test_creation_arguments(i, 253);
 
   /* Test encode/decode pipeline. */
-  test_encode_decode(64 * 16, 16, 253);
+  test_encode_decode(64 * 18, 18, 253);
 
   fprintf(stderr, "All projection tests passed.\n");
   return 0;