Adds packet padding that works for all codes and fixes 40/60 ms CBR.

Padding is now handled by the repacketizer.
diff --git a/include/opus.h b/include/opus.h
index 38817b4..b9e8996 100644
--- a/include/opus.h
+++ b/include/opus.h
@@ -911,6 +911,8 @@
   */
 OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);
 
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
 /**@}*/
 
 #ifdef __cplusplus
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index abac145..4304db4 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -244,44 +244,6 @@
     return OPUS_OK;
 }
 
-int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len)
-{
-   if (len == new_len)
-      return 0;
-   if (len > new_len)
-      return 1;
-
-   if ((data[0]&0x3)==0)
-   {
-      int i;
-      int padding, nb_255s;
-
-      padding = new_len - len;
-      if (padding >= 2)
-      {
-         nb_255s = (padding-2)/255;
-
-         for (i=len-1;i>=1;i--)
-            data[i+nb_255s+2] = data[i];
-         data[0] |= 0x3;
-         data[1] = 0x41;
-         for (i=0;i<nb_255s;i++)
-            data[i+2] = 255;
-         data[nb_255s+2] = padding-255*nb_255s-2;
-         for (i=len+3+nb_255s;i<new_len;i++)
-            data[i] = 0;
-      } else {
-         for (i=len-1;i>=1;i--)
-            data[i+1] = data[i];
-         data[0] |= 0x3;
-         data[1] = 1;
-      }
-      return 0;
-   } else {
-      return 1;
-   }
-}
-
 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
 {
    int period;
@@ -1375,6 +1337,7 @@
        int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
        VARDECL(OpusRepacketizer, rp);
        opus_int32 bytes_per_frame;
+       opus_int32 repacketize_len;
 
 #ifndef DISABLE_FLOAT_API
        if (analysis_read_pos_bak!= -1)
@@ -1427,7 +1390,11 @@
              return OPUS_INTERNAL_ERROR;
           }
        }
-       ret = opus_repacketizer_out(rp, data, out_data_bytes);
+       if (st->use_vbr)
+          repacketize_len = out_data_bytes;
+       else
+          repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes);
+       ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
        if (ret<0)
        {
           RESTORE_STACK;
@@ -1942,7 +1909,8 @@
     ret += 1+redundancy_bytes;
     if (!st->use_vbr && ret >= 3)
     {
-       if (pad_frame(data, ret, max_data_bytes))
+       if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
+
        {
           RESTORE_STACK;
           return OPUS_INTERNAL_ERROR;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index 4430e69..7cb1af7 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -757,7 +757,7 @@
    surround_rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
-      max_data_bytes = IMIN(max_data_bytes, st->bitrate_bps/(8*Fs/frame_size));
+      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    for (s=0;s<st->layout.nb_streams;s++)
@@ -859,13 +859,8 @@
          while taking into account the fact that the encoder can now return
          more than one frame at a time (e.g. 60 ms CELT-only) */
       opus_repacketizer_cat(&rp, tmp_data, len);
-      len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp), data, max_data_bytes-tot_size, s != st->layout.nb_streams-1);
-      if (!vbr && s == st->layout.nb_streams-1 && curr_max > len)
-      {
-         /* Can pad_frame() still fail here? */
-         if (!pad_frame(data, len, curr_max))
-            len = curr_max;
-      }
+      len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
+            data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
       data += len;
       tot_size += len;
    }
diff --git a/src/opus_private.h b/src/opus_private.h
index 76564fc..83225f2 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -121,7 +121,8 @@
       const unsigned char *frames[48], opus_int16 size[48],
       int *payload_offset, opus_int32 *packet_offset);
 
-opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen, int self_delimited);
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad);
 
 int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
 
diff --git a/src/repacketizer.c b/src/repacketizer.c
index 0c5d840..45bb384 100644
--- a/src/repacketizer.c
+++ b/src/repacketizer.c
@@ -94,12 +94,14 @@
    return rp->nb_frames;
 }
 
-opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen, int self_delimited)
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad)
 {
    int i, count;
    opus_int32 tot_size;
    opus_int16 *len;
    const unsigned char **frames;
+   unsigned char * ptr;
 
    if (begin<0 || begin>=end || end>rp->nb_frames)
    {
@@ -115,18 +117,15 @@
    else
       tot_size = 0;
 
-   switch (count)
-   {
-   case 1:
+   ptr = data;
+   if (count==1)
    {
       /* Code 0 */
       tot_size += len[0]+1;
       if (tot_size > maxlen)
          return OPUS_BUFFER_TOO_SMALL;
-      *data++ = rp->toc&0xFC;
-   }
-   break;
-   case 2:
+      *ptr++ = rp->toc&0xFC;
+   } else if (count==2)
    {
       if (len[1] == len[0])
       {
@@ -134,22 +133,28 @@
          tot_size += 2*len[0]+1;
          if (tot_size > maxlen)
             return OPUS_BUFFER_TOO_SMALL;
-         *data++ = (rp->toc&0xFC) | 0x1;
+         *ptr++ = (rp->toc&0xFC) | 0x1;
       } else {
          /* Code 2 */
          tot_size += len[0]+len[1]+2+(len[0]>=252);
          if (tot_size > maxlen)
             return OPUS_BUFFER_TOO_SMALL;
-         *data++ = (rp->toc&0xFC) | 0x2;
-         data += encode_size(len[0], data);
+         *ptr++ = (rp->toc&0xFC) | 0x2;
+         ptr += encode_size(len[0], ptr);
       }
    }
-   break;
-   default:
+   if (count > 2 || (pad && tot_size < maxlen))
    {
       /* Code 3 */
       int vbr;
+      int pad_amount=0;
 
+      /* Restart the process for the padding case */
+      ptr = data;
+      if (self_delimited)
+         tot_size = 1 + (len[count-1]>=252);
+      else
+         tot_size = 0;
       vbr = 0;
       for (i=1;i<count;i++)
       {
@@ -168,41 +173,77 @@
 
          if (tot_size > maxlen)
             return OPUS_BUFFER_TOO_SMALL;
-         *data++ = (rp->toc&0xFC) | 0x3;
-         *data++ = count | 0x80;
-         for (i=0;i<count-1;i++)
-            data += encode_size(len[i], data);
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count | 0x80;
       } else {
          tot_size += count*len[0]+2;
          if (tot_size > maxlen)
             return OPUS_BUFFER_TOO_SMALL;
-         *data++ = (rp->toc&0xFC) | 0x3;
-         *data++ = count;
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count;
+      }
+      pad_amount = pad ? (maxlen-tot_size) : 0;
+      if (pad_amount != 0)
+      {
+         int nb_255s;
+         data[1] |= 0x40;
+         nb_255s = (pad_amount-1)/255;
+         for (i=0;i<nb_255s;i++)
+            *ptr++ = 255;
+         *ptr++ = pad_amount-255*nb_255s-1;
+         tot_size += pad_amount;
+      }
+      if (vbr)
+      {
+         for (i=0;i<count-1;i++)
+            ptr += encode_size(len[i], ptr);
       }
    }
-   break;
-   }
    if (self_delimited) {
-      int sdlen = encode_size(len[count-1], data);
-      data += sdlen;
+      int sdlen = encode_size(len[count-1], ptr);
+      ptr += sdlen;
    }
    /* Copy the actual data */
    for (i=0;i<count;i++)
    {
-      OPUS_COPY(data, frames[i], len[i]);
-      data += len[i];
+      /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place
+         padding from opus_packet_pad */
+      OPUS_MOVE(ptr, frames[i], len[i]);
+      ptr += len[i];
+   }
+   if (pad)
+   {
+      for (i=ptr-data;i<maxlen;i++)
+         data[i] = 0;
    }
    return tot_size;
 }
 
 opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen)
 {
-   return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0);
+   return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0);
 }
 
 opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen)
 {
-   return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0);
+   return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0);
 }
 
-
+int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+{
+   OpusRepacketizer rp;
+   opus_int32 ret;
+   if (len==new_len)
+      return OPUS_OK;
+   else if (len > new_len)
+      return OPUS_BAD_ARG;
+   opus_repacketizer_init(&rp);
+   /* Moving payload to the end of the packet so we can do in-place padding */
+   OPUS_MOVE(data+new_len-len, data, len);
+   opus_repacketizer_cat(&rp, data+new_len-len, len);
+   ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
+   if (ret > 0)
+      return OPUS_OK;
+   else
+      return ret;
+}