NetEq: Change NetEq's ramp-up behavior after expansions

NetEq tapers down the audio produced through loss concealment when the
expansion has been going on for some time. When the audio packets starts
coming in again, there is a ramp-up that happens. This ramp-up could
before this change extend over more than one 10 ms block, which made
keeping track of the scaling factor necessary. With this change, we make
this ramp-up quicker in the rare cases when it lasted more than 10 ms,
so that it always ramps up to 100% within one block. This way, we can
remove the mute_factor_array.

This change breaks bit-exactness, but careful listening could not reveal
an audible difference.

This change is a part of a larger refactoring of NetEq's PLC code.

Bug: webrtc:9180
Change-Id: I4c513ce3ed8d66f9beec2abfb1f0c7ffaac7a21e
Reviewed-on: https://webrtc-review.googlesource.com/77180
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23342}
diff --git a/modules/audio_coding/neteq/merge.cc b/modules/audio_coding/neteq/merge.cc
index b568ff0..fb0bb0d 100644
--- a/modules/audio_coding/neteq/merge.cc
+++ b/modules/audio_coding/neteq/merge.cc
@@ -44,7 +44,6 @@
 Merge::~Merge() = default;
 
 size_t Merge::Process(int16_t* input, size_t input_length,
-                      int16_t* external_mute_factor_array,
                       AudioMultiVector* output) {
   // TODO(hlundin): Change to an enumerator and skip assert.
   assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ ==  32000 ||
@@ -73,20 +72,9 @@
         input_length_per_channel, 0, input_channel.get());
     expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get());
 
-    int16_t new_mute_factor = SignalScaling(
-        input_channel.get(), input_length_per_channel, expanded_channel.get());
-
-    // Adjust muting factor (product of "main" muting factor and expand muting
-    // factor).
-    int16_t* external_mute_factor = &external_mute_factor_array[channel];
-    *external_mute_factor =
-        (*external_mute_factor * expand_->MuteFactor(channel)) >> 14;
-
-    // Update |external_mute_factor| if it is lower than |new_mute_factor|.
-    if (new_mute_factor > *external_mute_factor) {
-      *external_mute_factor = std::min(new_mute_factor,
-                                       static_cast<int16_t>(16384));
-    }
+    const int16_t new_mute_factor = std::min<int16_t>(
+        16384, SignalScaling(input_channel.get(), input_length_per_channel,
+                             expanded_channel.get()));
 
     if (channel == 0) {
       // Downsample, correlate, and find strongest correlation period for the
@@ -110,18 +98,24 @@
         expanded_length - best_correlation_index);
     interpolation_length = std::min(interpolation_length,
                                     input_length_per_channel);
-    if (*external_mute_factor < 16384) {
+
+    RTC_DCHECK_LE(new_mute_factor, 16384);
+    int16_t mute_factor =
+        std::max(expand_->MuteFactor(channel), new_mute_factor);
+    RTC_DCHECK_GE(mute_factor, 0);
+
+    if (mute_factor < 16384) {
       // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
-      // and so on.
-      int increment = 4194 / fs_mult_;
-      *external_mute_factor =
-          static_cast<int16_t>(DspHelper::RampSignal(input_channel.get(),
-                                                     interpolation_length,
-                                                     *external_mute_factor,
-                                                     increment));
+      // and so on, or as fast as it takes to come back to full gain within the
+      // frame length.
+      const int back_to_fullscale_inc = static_cast<int>(
+          ((16384 - mute_factor) << 6) / input_length_per_channel);
+      const int increment = std::max(4194 / fs_mult_, back_to_fullscale_inc);
+      mute_factor = static_cast<int16_t>(DspHelper::RampSignal(
+          input_channel.get(), interpolation_length, mute_factor, increment));
       DspHelper::UnmuteSignal(&input_channel[interpolation_length],
                               input_length_per_channel - interpolation_length,
-                              external_mute_factor, increment,
+                              &mute_factor, increment,
                               &decoded_output[interpolation_length]);
     } else {
       // No muting needed.
@@ -134,12 +128,12 @@
     // Do overlap and mix linearly.
     int16_t increment =
         static_cast<int16_t>(16384 / (interpolation_length + 1));  // In Q14.
-    int16_t mute_factor = 16384 - increment;
+    int16_t local_mute_factor = 16384 - increment;
     memmove(temp_data_.data(), expanded_channel.get(),
             sizeof(int16_t) * best_correlation_index);
     DspHelper::CrossFade(&expanded_channel[best_correlation_index],
                          input_channel.get(), interpolation_length,
-                         &mute_factor, increment, decoded_output);
+                         &local_mute_factor, increment, decoded_output);
 
     output_length = best_correlation_index + input_length_per_channel;
     if (channel == 0) {