API to control target delay in NetEq jitter buffer. NetEq maintains the given delay unless channel conditions require a higher delay.

TEST=unit-test, manual, trybots.
R=henrik.lundin@webrtc.org, henrika@webrtc.org, mflodman@webrtc.org, mikhal@webrtc.org, stefan@webrtc.org, tina.legrand@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/1384005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4087 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
index a2e7efe..c3bbc9b 100644
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@@ -639,8 +639,9 @@
                                         const uint32_t timestamp = 0) = 0;
 
   ///////////////////////////////////////////////////////////////////////////
-  // int32_t SetMinimumPlayoutDelay()
-  // Set Minimum playout delay, used for lip-sync.
+  // int SetMinimumPlayoutDelay()
+  // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
+  // such a delay unless channel condition yields to a higher delay.
   //
   // Input:
   //   -time_ms            : minimum delay in milliseconds.
@@ -649,7 +650,15 @@
   //   -1 if failed to set the delay,
   //    0 if the minimum delay is set.
   //
-  virtual int32_t SetMinimumPlayoutDelay(const int32_t time_ms) = 0;
+  virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  virtual int LeastRequiredDelayMs() const = 0;
 
   ///////////////////////////////////////////////////////////////////////////
   // int32_t RegisterIncomingMessagesCallback()
@@ -945,8 +954,9 @@
   // Set an initial delay for playout.
   // An initial delay yields ACM playout silence until equivalent of |delay_ms|
   // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
-  // from NetEq in its regular fashion, and the given delay is maintained as
-  // "minimum playout delay."
+  // from NetEq in its regular fashion, and the given delay is maintained
+  // through out the call, unless channel conditions yield to a higher jitter
+  // buffer delay.
   //
   // Input:
   //   -delay_ms           : delay in milliseconds.
diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.cc b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
index f6b64d7..f2eafd7 100644
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.cc
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
@@ -44,12 +44,12 @@
       received_stereo_(false),
       master_slave_info_(NULL),
       previous_audio_activity_(AudioFrame::kVadUnknown),
-      extra_delay_(0),
       callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
       min_of_max_num_packets_(0),
       min_of_buffer_size_bytes_(0),
       per_packet_overhead_bytes_(0),
-      av_sync_(false) {
+      av_sync_(false),
+      minimum_delay_ms_(0) {
   for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) {
     is_initialized_[n] = false;
     ptr_vadinst_[n] = NULL;
@@ -270,24 +270,6 @@
   return 0;
 }
 
-int32_t ACMNetEQ::SetExtraDelay(const int32_t delay_in_ms) {
-  CriticalSectionScoped lock(neteq_crit_sect_);
-
-  for (int16_t idx = 0; idx < num_slaves_ + 1; idx++) {
-    if (!is_initialized_[idx]) {
-      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "SetExtraDelay: NetEq is not initialized.");
-      return -1;
-    }
-    if (WebRtcNetEQ_SetExtraDelay(inst_[idx], delay_in_ms) < 0) {
-      LogError("SetExtraDelay", idx);
-      return -1;
-    }
-  }
-  extra_delay_ = delay_in_ms;
-  return 0;
-}
-
 int32_t ACMNetEQ::SetAVTPlayout(const bool enable) {
   CriticalSectionScoped lock(neteq_crit_sect_);
   if (avt_playout_ != enable) {
@@ -1037,14 +1019,6 @@
     num_slaves_ = 1;
     is_initialized_[slave_idx] = true;
 
-    // Set Slave delay as all other instances.
-    if (WebRtcNetEQ_SetExtraDelay(inst_[slave_idx], extra_delay_) < 0) {
-      LogError("SetExtraDelay", slave_idx);
-      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "AddSlave: AddSlave Failed, Could not set delay");
-      return -1;
-    }
-
     // Set AVT
     if (WebRtcNetEQ_SetAVTPlayout(inst_[slave_idx],
                                   (avt_playout_) ? 1 : 0) < 0) {
@@ -1093,8 +1067,13 @@
                    "AddSlave: AddSlave Failed, Could not Set Playout Mode.");
       return -1;
     }
+
     // Set AV-sync for the slave.
     WebRtcNetEQ_EnableAVSync(inst_[slave_idx], av_sync_ ? 1 : 0);
+
+    // Set minimum delay.
+    if (minimum_delay_ms_ > 0)
+      WebRtcNetEQ_SetMinimumDelay(inst_[slave_idx], minimum_delay_ms_);
   }
 
   return 0;
@@ -1119,4 +1098,23 @@
   }
 }
 
+int ACMNetEQ::SetMinimumDelay(int minimum_delay_ms) {
+  CriticalSectionScoped lock(neteq_crit_sect_);
+  for (int i = 0; i < num_slaves_ + 1; ++i) {
+    assert(is_initialized_[i]);
+    if (WebRtcNetEQ_SetMinimumDelay(inst_[i], minimum_delay_ms) < 0)
+      return -1;
+  }
+  minimum_delay_ms_ = minimum_delay_ms;
+  return 0;
+}
+
+int ACMNetEQ::LeastRequiredDelayMs() const {
+  CriticalSectionScoped lock(neteq_crit_sect_);
+  assert(is_initialized_[0]);
+
+  // Sufficient to query the master.
+  return WebRtcNetEQ_GetRequiredDelayMs(inst_[0]);
+}
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/acm_neteq.h b/webrtc/modules/audio_coding/main/source/acm_neteq.h
index ed81544..e70ac24 100644
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.h
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.h
@@ -130,18 +130,6 @@
                                int16_t num_codecs);
 
   //
-  // SetExtraDelay()
-  // Sets a |delay_in_ms| milliseconds extra delay in NetEQ.
-  //
-  // Input:
-  //   - delay_in_ms          : Extra delay in milliseconds.
-  //
-  // Return value             : 0 if ok.
-  //                           <0 if NetEQ returned an error.
-  //
-  int32_t SetExtraDelay(const int32_t delay_in_ms);
-
-  //
   // SetAVTPlayout()
   // Enable/disable playout of AVT payloads.
   //
@@ -301,6 +289,20 @@
   //
   void EnableAVSync(bool enable);
 
+  //
+  // Set a minimum delay in NetEq. Unless channel condition dictates a longer
+  // delay, the given delay is maintained by NetEq.
+  //
+  int SetMinimumDelay(int minimum_delay_ms);
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  int LeastRequiredDelayMs() const ;
+
  private:
   //
   // RTPPack()
@@ -365,7 +367,6 @@
   bool received_stereo_;
   void* master_slave_info_;
   AudioFrame::VADActivity previous_audio_activity_;
-  int32_t extra_delay_;
 
   CriticalSectionWrapper* callback_crit_sect_;
   // Minimum of "max number of packets," among all NetEq instances.
@@ -376,6 +377,8 @@
 
   // Keep track of AV-sync. Just used to set the slave when a slave is added.
   bool av_sync_;
+
+  int minimum_delay_ms_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
index 753291b..e6ba500 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
@@ -137,14 +137,15 @@
              '../test/RTPFile.cc',
              '../test/SpatialAudio.cc',
              '../test/TestAllCodecs.cc',
+             '../test/target_delay_unittest.cc',
              '../test/Tester.cc',
              '../test/TestFEC.cc',
              '../test/TestStereo.cc',
              '../test/TestVADDTX.cc',
              '../test/TimedTrace.cc',
              '../test/TwoWayCommunication.cc',
-             '../test/utility.cc',
              '../test/initial_delay_unittest.cc',
+             '../test/utility.cc',
           ],
         },
         {
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
index be9befc..5eb631a 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@@ -2116,8 +2116,11 @@
 
     if (av_sync_ || track_neteq_buffer_) {
       last_incoming_send_timestamp_ = rtp_info.header.timestamp;
-      first_payload_received_ = true;
     }
+
+    // Set the following regardless of tracking NetEq buffer or being in
+    // AV-sync mode.
+    first_payload_received_ = true;
   }
   return 0;
 }
@@ -2192,8 +2195,7 @@
 }
 
 // Minimum playout delay (Used for lip-sync).
-int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay(
-    const int32_t time_ms) {
+int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) {
   if ((time_ms < 0) || (time_ms > 10000)) {
     WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                  "Delay must be in the range of 0-10000 milliseconds.");
@@ -2205,7 +2207,7 @@
     if (track_neteq_buffer_ && first_payload_received_)
       return 0;
   }
-  return neteq_.SetExtraDelay(time_ms);
+  return neteq_.SetMinimumDelay(time_ms);
 }
 
 // Get Dtmf playout status.
@@ -2937,7 +2939,7 @@
   }
   av_sync_ = true;
   neteq_.EnableAVSync(av_sync_);
-  return neteq_.SetExtraDelay(delay_ms);
+  return neteq_.SetMinimumDelay(delay_ms);
 }
 
 bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz,
@@ -3041,4 +3043,8 @@
           initial_delay_ms_ * in_sample_rate_khz));
 }
 
+int AudioCodingModuleImpl::LeastRequiredDelayMs() const {
+  return std::max(neteq_.LeastRequiredDelayMs(), initial_delay_ms_);
+}
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
index fe1564d..a0ae014 100644
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
@@ -167,8 +167,17 @@
                           const uint8_t payload_type,
                           const uint32_t timestamp = 0);
 
-  // Minimum playout delay (used for lip-sync).
-  int32_t SetMinimumPlayoutDelay(const int32_t time_ms);
+  // NetEq minimum playout delay (used for lip-sync). The actual target delay
+  // is the max of |time_ms| and the required delay dictated by the channel.
+  int SetMinimumPlayoutDelay(int time_ms);
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  int LeastRequiredDelayMs() const ;
 
   // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf
   // tone.
diff --git a/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc b/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc
new file mode 100644
index 0000000..0ae2529
--- /dev/null
+++ b/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc
@@ -0,0 +1,172 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/sleep.h"
+
+namespace webrtc {
+class TargetDelayTest : public ::testing::Test {
+ protected:
+  static const int kSampleRateHz = 16000;
+  static const int kNum10msPerFrame = 2;
+  static const int kFrameSizeSamples = 320;  // 20 ms @ 16 kHz.
+  // payload-len = frame-samples * 2 bytes/sample.
+  static const int kPayloadLenBytes = 320 * 2;
+  // Inter-arrival time in number of packets in a jittery channel. One is no
+  // jitter.
+  static const int kInterarrivalJitterPacket = 2;
+
+  TargetDelayTest()
+      : acm_(AudioCodingModule::Create(0)) {}
+
+  ~TargetDelayTest() {
+    AudioCodingModule::Destroy(acm_);
+  }
+
+  void SetUp() {
+    EXPECT_TRUE(acm_ != NULL);
+
+    CodecInst codec;
+    ASSERT_EQ(0, AudioCodingModule::Codec("L16", &codec, kSampleRateHz, 1));
+    ASSERT_EQ(0, acm_->InitializeReceiver());
+    ASSERT_EQ(0, acm_->RegisterReceiveCodec(codec));
+
+    rtp_info_.header.payloadType = codec.pltype;
+    rtp_info_.header.timestamp = 0;
+    rtp_info_.header.ssrc = 0x12345678;
+    rtp_info_.header.markerBit = false;
+    rtp_info_.header.sequenceNumber = 0;
+    rtp_info_.type.Audio.channel = 1;
+    rtp_info_.type.Audio.isCNG = false;
+    rtp_info_.frameType = kAudioFrameSpeech;
+  }
+
+  void Push() {
+    rtp_info_.header.timestamp += kFrameSizeSamples;
+    rtp_info_.header.sequenceNumber++;
+    uint8_t payload[kPayloadLenBytes];  // Doesn't need to be initialized.
+    ASSERT_EQ(0, acm_->IncomingPacket(payload, kFrameSizeSamples * 2,
+                                      rtp_info_));
+  }
+
+  // Pull audio equivalent to the amount of audio in one RTP packet.
+  void Pull() {
+    AudioFrame frame;
+    for (int k = 0; k < kNum10msPerFrame; ++k) {  // Pull one frame.
+      ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &frame));
+      // Had to use ASSERT_TRUE, ASSERT_EQ generated error.
+      ASSERT_TRUE(kSampleRateHz == frame.sample_rate_hz_);
+      ASSERT_EQ(1, frame.num_channels_);
+      ASSERT_TRUE(kSampleRateHz / 100 == frame.samples_per_channel_);
+    }
+  }
+
+  void Run(bool clean) {
+    for (int n = 0; n < 10; ++n) {
+      for (int m = 0; m < 5; ++m) {
+        Push();
+        Pull();
+      }
+
+      if (!clean) {
+        for (int m = 0; m < 10; ++m) {  // Long enough to trigger delay change.
+          Push();
+          for (int n = 0; n < kInterarrivalJitterPacket; ++n)
+            Pull();
+        }
+      }
+    }
+  }
+
+  int SetMinimumDelay(int delay_ms) {
+    return acm_->SetMinimumPlayoutDelay(delay_ms);
+  }
+
+  int GetCurrentOptimalDelayMs() {
+    ACMNetworkStatistics stats;
+    acm_->NetworkStatistics(&stats);
+    return stats.preferredBufferSize;
+  }
+
+  int RequiredDelay() {
+    return acm_->LeastRequiredDelayMs();
+  }
+
+  AudioCodingModule* acm_;
+  WebRtcRTPHeader rtp_info_;
+};
+
+TEST_F(TargetDelayTest, OutOfRangeInput) {
+  EXPECT_EQ(-1, SetMinimumDelay(-1));
+  EXPECT_EQ(-1, SetMinimumDelay(10001));
+}
+
+TEST_F(TargetDelayTest, NoTargetDelayBufferSizeChanges) {
+  for (int n = 0; n < 30; ++n)  // Run enough iterations.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+  Run(false);  // Run with jitter.
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_GT(jittery_optimal_delay, clean_optimal_delay);
+  int required_delay = RequiredDelay();
+  EXPECT_GT(required_delay, 0);
+  EXPECT_NEAR(required_delay, jittery_optimal_delay, 1);
+}
+
+TEST_F(TargetDelayTest, WithTargetDelayBufferNotChanging) {
+  // A target delay that is one packet larger than jitter.
+  const int kTargetDelayMs = (kInterarrivalJitterPacket + 1) *
+      kNum10msPerFrame * 10;
+  ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
+  for (int n = 0; n < 30; ++n)  // Run enough iterations to fill up the buffer.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(kTargetDelayMs, clean_optimal_delay);
+  Run(false);  // Run with jitter.
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(jittery_optimal_delay, clean_optimal_delay);
+}
+
+TEST_F(TargetDelayTest, RequiredDelayAtCorrectRange) {
+  for (int n = 0; n < 30; ++n)  // Run clean and store delay.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+
+  // A relatively large delay.
+  const int kTargetDelayMs = (kInterarrivalJitterPacket + 10) *
+      kNum10msPerFrame * 10;
+  ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
+  for (int n = 0; n < 300; ++n)  // Run enough iterations to fill up the buffer.
+    Run(true);
+  Run(false);  // Run with jitter.
+
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(kTargetDelayMs, jittery_optimal_delay);
+
+  int required_delay = RequiredDelay();
+
+  // Checking |required_delay| is in correct range.
+  EXPECT_GT(required_delay, 0);
+  EXPECT_GT(jittery_optimal_delay, required_delay);
+  EXPECT_GT(required_delay, clean_optimal_delay);
+
+  // A tighter check for the value of |required_delay|.
+  // The jitter forces a delay of
+  // |kInterarrivalJitterPacket * kNum10msPerFrame * 10| milliseconds. So we
+  // expect |required_delay| be close to that.
+  EXPECT_NEAR(kInterarrivalJitterPacket * kNum10msPerFrame * 10,
+              required_delay, 1);
+}
+
+}  // webrtc
diff --git a/webrtc/modules/audio_coding/neteq/automode.c b/webrtc/modules/audio_coding/neteq/automode.c
index edee98e..ea6fa8d 100644
--- a/webrtc/modules/audio_coding/neteq/automode.c
+++ b/webrtc/modules/audio_coding/neteq/automode.c
@@ -216,6 +216,14 @@
             streamingMode);
         if (tempvar > 0)
         {
+            int high_lim_delay;
+            /* Convert the minimum delay from milliseconds to packets in Q8.
+             * |fsHz| is sampling rate in Hertz, and |inst->packetSpeechLenSamp|
+             * is the number of samples per packet (according to the last
+             * decoding).
+             */
+            int32_t minimum_delay_q8 = ((inst->minimum_delay_ms *
+                (fsHz / 1000)) << 8) / inst->packetSpeechLenSamp;
             inst->optBufLevel = tempvar;
 
             if (streamingMode != 0)
@@ -224,6 +232,13 @@
                     inst->maxCSumIatQ8);
             }
 
+            /* The required delay. */
+            inst->required_delay_q8 = inst->optBufLevel;
+
+            // Maintain the target delay.
+            inst->optBufLevel = WEBRTC_SPL_MAX(inst->optBufLevel,
+                                               minimum_delay_q8);
+
             /*********/
             /* Limit */
             /*********/
@@ -238,8 +253,12 @@
             maxBufLen = WEBRTC_SPL_LSHIFT_W32(maxBufLen, 8); /* shift to Q8 */
 
             /* Enforce upper limit; 75% of maxBufLen */
-            inst->optBufLevel = WEBRTC_SPL_MIN( inst->optBufLevel,
-                (maxBufLen >> 1) + (maxBufLen >> 2) ); /* 1/2 + 1/4 = 75% */
+            /* 1/2 + 1/4 = 75% */
+            high_lim_delay = (maxBufLen >> 1) + (maxBufLen >> 2);
+            inst->optBufLevel = WEBRTC_SPL_MIN(inst->optBufLevel,
+                                               high_lim_delay);
+            inst->required_delay_q8 = WEBRTC_SPL_MIN(inst->required_delay_q8,
+                                                     high_lim_delay);
         }
         else
         {
@@ -700,6 +719,7 @@
      */
     inst->optBufLevel = WEBRTC_SPL_MIN(4,
         (maxBufLenPackets >> 1) + (maxBufLenPackets >> 1)); /* 75% of maxBufLenPackets */
+    inst->required_delay_q8 = inst->optBufLevel;
     inst->levelFiltFact = 253;
 
     /*
diff --git a/webrtc/modules/audio_coding/neteq/automode.h b/webrtc/modules/audio_coding/neteq/automode.h
index 5996a51..49878c0 100644
--- a/webrtc/modules/audio_coding/neteq/automode.h
+++ b/webrtc/modules/audio_coding/neteq/automode.h
@@ -89,6 +89,12 @@
      reached 0 */
     int16_t extraDelayMs; /* extra delay for sync with video */
 
+    int minimum_delay_ms; /* Desired delay, NetEq maintains this amount of
+     delay unless jitter statistics suggests a higher value. */
+    int required_delay_q8; /* Smallest delay required. This is computed
+     according to inter-arrival time and playout mode. It has the same unit
+     as |optBufLevel|. */
+
     /* Peak-detection */
     /* vector with the latest peak periods (peak spacing in samples) */
     uint32_t peakPeriodSamp[NUM_PEAKS];
diff --git a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h
index 4eefce0..021704c 100644
--- a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h
+++ b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_internal.h
@@ -309,6 +309,19 @@
                              WebRtcNetEQ_RTPInfo* rtp_info,
                              uint32_t receive_timestamp);
 
+/*
+ * Set a minimum latency for the jitter buffer. The overall delay is the max of
+ * |minimum_delay_ms| and the latency that is internally computed based on the
+ * inter-arrival times.
+ */
+int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms);
+
+/*
+ * Get the least required delay in milliseconds given inter-arrival times
+ * and playout mode.
+ */
+int WebRtcNetEQ_GetRequiredDelayMs(const void* inst);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/webrtc/modules/audio_coding/neteq/mcu_reset.c b/webrtc/modules/audio_coding/neteq/mcu_reset.c
index 3aae4ce..c8a4cd7 100644
--- a/webrtc/modules/audio_coding/neteq/mcu_reset.c
+++ b/webrtc/modules/audio_coding/neteq/mcu_reset.c
@@ -32,7 +32,9 @@
     inst->main_inst = NULL;
     inst->one_desc = 0;
     inst->BufferStat_inst.Automode_inst.extraDelayMs = 0;
+    inst->BufferStat_inst.Automode_inst.minimum_delay_ms = 0;
     inst->NetEqPlayoutMode = kPlayoutOn;
+    inst->av_sync = 0;
 
     WebRtcNetEQ_DbReset(&inst->codec_DB_inst);
     memset(&inst->PayloadSplit_inst, 0, sizeof(SplitInfo_t));
diff --git a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
index 31940c8..8347925 100644
--- a/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
+++ b/webrtc/modules/audio_coding/neteq/webrtc_neteq.c
@@ -437,6 +437,7 @@
     NetEqMainInst->MCUinst.first_packet = 1;
     NetEqMainInst->MCUinst.one_desc = 0;
     NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.extraDelayMs = 0;
+    NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms = 0;
     NetEqMainInst->MCUinst.NoOfExpandCalls = 0;
     NetEqMainInst->MCUinst.fs = fs;
 
@@ -529,6 +530,19 @@
     return (0);
 }
 
+int WebRtcNetEQ_SetMinimumDelay(void *inst, int minimum_delay_ms) {
+  MainInst_t *NetEqMainInst = (MainInst_t*) inst;
+  if (NetEqMainInst == NULL)
+    return -1;
+  if (minimum_delay_ms < 0 || minimum_delay_ms > 10000) {
+      NetEqMainInst->ErrorCode = -FAULTY_DELAYVALUE;
+      return -1;
+  }
+  NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst.minimum_delay_ms =
+      minimum_delay_ms;
+  return 0;
+}
+
 int WebRtcNetEQ_SetPlayoutMode(void *inst, enum WebRtcNetEQPlayoutMode playoutMode)
 {
     MainInst_t *NetEqMainInst = (MainInst_t*) inst;
@@ -1213,7 +1227,7 @@
     /* Get optimal buffer size */
     /***************************/
 
-    if (NetEqMainInst->MCUinst.fs != 0 && NetEqMainInst->MCUinst.fs <= WEBRTC_SPL_WORD16_MAX)
+    if (NetEqMainInst->MCUinst.fs != 0)
     {
         /* preferredBufferSize = Bopt * packSizeSamples / (fs/1000) */
         stats->preferredBufferSize
@@ -1693,3 +1707,25 @@
   }
   return SYNC_PAYLOAD_LEN_BYTES;
 }
+
+int WebRtcNetEQ_GetRequiredDelayMs(const void* inst) {
+  const MainInst_t* NetEqMainInst = (MainInst_t*)inst;
+  const AutomodeInst_t* auto_mode = (NetEqMainInst == NULL) ? NULL :
+      &NetEqMainInst->MCUinst.BufferStat_inst.Automode_inst;
+
+  /* Instance sanity */
+  if (NetEqMainInst == NULL || auto_mode == NULL)
+    return 0;
+
+  if (NetEqMainInst->MCUinst.fs == 0)
+    return 0;  // Sampling rate not initialized.
+
+  /* |required_delay_q8| has the unit of packets in Q8 domain, therefore,
+   * the corresponding delay is
+   * required_delay_ms = (1000 * required_delay_q8 * samples_per_packet /
+   *     sample_rate_hz) / 256;
+   */
+  return (auto_mode->required_delay_q8 *
+      ((auto_mode->packetSpeechLenSamp * 1000) / NetEqMainInst->MCUinst.fs) +
+      128) >> 8;
+}
diff --git a/webrtc/video_engine/stream_synchronization.cc b/webrtc/video_engine/stream_synchronization.cc
index 6ad579c..9490d10 100644
--- a/webrtc/video_engine/stream_synchronization.cc
+++ b/webrtc/video_engine/stream_synchronization.cc
@@ -29,12 +29,14 @@
     extra_video_delay_ms = 0;
     last_video_delay_ms = 0;
     extra_audio_delay_ms = 0;
+    last_audio_delay_ms = 0;
     network_delay = 120;
   }
 
   int extra_video_delay_ms;
   int last_video_delay_ms;
   int extra_audio_delay_ms;
+  int last_audio_delay_ms;
   int network_delay;
 };
 
@@ -87,9 +89,9 @@
 
 bool StreamSynchronization::ComputeDelays(int relative_delay_ms,
                                           int current_audio_delay_ms,
-                                          int* extra_audio_delay_ms,
+                                          int* total_audio_delay_target_ms,
                                           int* total_video_delay_target_ms) {
-  assert(extra_audio_delay_ms && total_video_delay_target_ms);
+  assert(total_audio_delay_target_ms && total_video_delay_target_ms);
 
   int current_video_delay_ms = *total_video_delay_target_ms;
   WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
@@ -173,17 +175,26 @@
   new_video_delay_ms =
       std::min(new_video_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs);
 
-  // Make sure that audio is never below our target.
-  channel_delay_->extra_audio_delay_ms =
-      std::max(base_target_delay_ms_, channel_delay_->extra_audio_delay_ms);
+  int new_audio_delay_ms;
+  if (channel_delay_->extra_audio_delay_ms > base_target_delay_ms_) {
+    new_audio_delay_ms = channel_delay_->extra_audio_delay_ms;
+  } else {
+    // No change to the audio delay. We are changing video and we only
+    // allow to change one at the time.
+    new_audio_delay_ms = channel_delay_->last_audio_delay_ms;
+  }
+
+  // Make sure that we don't go below the extra audio delay.
+  new_audio_delay_ms = std::max(
+      new_audio_delay_ms, channel_delay_->extra_audio_delay_ms);
 
   // Verify we don't go above the maximum allowed audio delay.
-  channel_delay_->extra_audio_delay_ms = std::min(
-      channel_delay_->extra_audio_delay_ms,
-      base_target_delay_ms_ + kMaxDeltaDelayMs);
+  new_audio_delay_ms =
+      std::min(new_audio_delay_ms, base_target_delay_ms_ + kMaxDeltaDelayMs);
 
-  // Remember our last video delay.
+  // Remember our last audio and video delays.
   channel_delay_->last_video_delay_ms = new_video_delay_ms;
+  channel_delay_->last_audio_delay_ms = new_audio_delay_ms;
 
   WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
       "Sync video delay %d ms for video channel and audio delay %d for audio "
@@ -192,8 +203,8 @@
       audio_channel_id_);
 
   // Return values.
-  *extra_audio_delay_ms = channel_delay_->extra_audio_delay_ms;
   *total_video_delay_target_ms = new_video_delay_ms;
+  *total_audio_delay_target_ms = new_audio_delay_ms;
   return true;
 }
 
@@ -201,6 +212,8 @@
   // Initial extra delay for audio (accounting for existing extra delay).
   channel_delay_->extra_audio_delay_ms +=
       target_delay_ms - base_target_delay_ms_;
+  channel_delay_->last_audio_delay_ms +=
+      target_delay_ms - base_target_delay_ms_;
 
   // The video delay is compared to the last value (and how much we can update
   // is limited by that as well).
diff --git a/webrtc/video_engine/vie_sync_module.cc b/webrtc/video_engine/vie_sync_module.cc
index d0617d6..06d4196 100644
--- a/webrtc/video_engine/vie_sync_module.cc
+++ b/webrtc/video_engine/vie_sync_module.cc
@@ -153,21 +153,24 @@
   TRACE_COUNTER1("webrtc", "SyncCurrentAudioDelay",
                  audio_jitter_buffer_delay_ms);
   TRACE_COUNTER1("webrtc", "SyncRelativeDelay", relative_delay_ms);
-  int extra_audio_delay_ms = 0;
+  int total_audio_delay_target_ms = 0;
   // Calculate the necessary extra audio delay and desired total video
   // delay to get the streams in sync.
+  int current_audio_delay = audio_jitter_buffer_delay_ms +
+      playout_buffer_delay_ms;
   if (!sync_->ComputeDelays(relative_delay_ms,
-                            audio_jitter_buffer_delay_ms,
-                            &extra_audio_delay_ms,
+                            current_audio_delay,
+                            &total_audio_delay_target_ms,
                             &total_video_delay_target_ms)) {
     return 0;
   }
 
-  TRACE_COUNTER1("webrtc", "SyncExtraAudioDelayTarget", extra_audio_delay_ms);
+  TRACE_COUNTER1("webrtc", "SyncTotalAudioDelayTarget",
+                 total_audio_delay_target_ms);
   TRACE_COUNTER1("webrtc", "SyncTotalVideoDelayTarget",
                  total_video_delay_target_ms);
   if (voe_sync_interface_->SetMinimumPlayoutDelay(
-      voe_channel_id_, extra_audio_delay_ms) == -1) {
+      voe_channel_id_, total_audio_delay_target_ms) == -1) {
     WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, vie_channel_->Id(),
                  "Error setting voice delay");
   }
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 0728990..936ddd1 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -950,6 +950,7 @@
     _countDeadDetections(0),
     _outputSpeechType(AudioFrame::kNormalSpeech),
     _average_jitter_buffer_delay_us(0),
+    least_required_delay_ms_(0),
     _previousTimestamp(0),
     _recPacketDelayMs(20),
     _RxVadDetection(false),
@@ -5092,6 +5093,9 @@
     return;
   }
 
+  // Update the least required delay.
+  least_required_delay_ms_ = _audioCodingModule.LeastRequiredDelayMs();
+
   if (STR_CASE_CMP("G722", current_receive_codec.plname) == 0) {
     // Even though the actual sampling rate for G.722 audio is
     // 16,000 Hz, the RTP clock rate for the G722 payload format is
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index 3914156..1bf5e51 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -205,6 +205,7 @@
     // VoEVideoSync
     bool GetDelayEstimate(int* jitter_buffer_delay_ms,
                           int* playout_buffer_delay_ms) const;
+    int least_required_delay_ms() const { return least_required_delay_ms_; }
     int SetInitialPlayoutDelay(int delay_ms);
     int SetMinimumPlayoutDelay(int delayMs);
     int GetPlayoutTimestamp(unsigned int& timestamp);
@@ -536,6 +537,7 @@
     AudioFrame::SpeechType _outputSpeechType;
     // VoEVideoSync
     uint32_t _average_jitter_buffer_delay_us;
+    int least_required_delay_ms_;
     uint32_t _previousTimestamp;
     uint16_t _recPacketDelayMs;
     // VoEAudioProcessing
diff --git a/webrtc/voice_engine/include/voe_video_sync.h b/webrtc/voice_engine/include/voe_video_sync.h
index 857422e..a3770ea 100644
--- a/webrtc/voice_engine/include/voe_video_sync.h
+++ b/webrtc/voice_engine/include/voe_video_sync.h
@@ -57,11 +57,18 @@
     // Gets the current sound card buffer size (playout delay).
     virtual int GetPlayoutBufferSize(int& buffer_ms) = 0;
 
-    // Sets an additional delay for the playout jitter buffer.
+    // Sets a minimum target delay for the jitter buffer. This delay is
+    // maintained by the jitter buffer, unless channel condition (jitter in
+    // inter-arrival times) dictates a higher required delay. The overall
+    // jitter buffer delay is max of |delay_ms| and the latency that NetEq
+    // computes based on inter-arrival times and its playout mode.
     virtual int SetMinimumPlayoutDelay(int channel, int delay_ms) = 0;
 
     // Sets an initial delay for the playout jitter buffer. The playout of the
-    // audio is delayed by |delay_ms| in millisecond.
+    // audio is delayed by |delay_ms| in milliseconds. Thereafter, the delay is
+    // maintained, unless NetEq's internal mechanism requires a higher latency.
+    // Such a latency is computed based on inter-arrival times and NetEq's
+    // playout mode.
     virtual int SetInitialPlayoutDelay(int channel, int delay_ms) = 0;
 
     // Gets the |jitter_buffer_delay_ms| (including the algorithmic delay), and
@@ -70,6 +77,12 @@
                                  int* jitter_buffer_delay_ms,
                                  int* playout_buffer_delay_ms) = 0;
 
+    // Returns the least required jitter buffer delay. This is computed by the
+    // the jitter buffer based on the inter-arrival time of RTP packets and
+    // playout mode. NetEq maintains this latency unless a higher value is
+    // requested by calling SetMinimumPlayoutDelay().
+    virtual int GetLeastRequiredDelayMs(int channel) const = 0;
+
     // Manual initialization of the RTP timestamp.
     virtual int SetInitTimestamp(int channel, unsigned int timestamp) = 0;
 
diff --git a/webrtc/voice_engine/voe_video_sync_impl.cc b/webrtc/voice_engine/voe_video_sync_impl.cc
index 8db2e68..91c0750 100644
--- a/webrtc/voice_engine/voe_video_sync_impl.cc
+++ b/webrtc/voice_engine/voe_video_sync_impl.cc
@@ -237,6 +237,24 @@
     return channelPtr->GetRtpRtcp(rtpRtcpModule);
 }
 
+int VoEVideoSyncImpl::GetLeastRequiredDelayMs(int channel) const {
+  WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1),
+               "GetLeastRequiredDelayMS(channel=%d)", channel);
+  IPHONE_NOT_SUPPORTED(_shared->statistics());
+
+  if (!_shared->statistics().Initialized()) {
+    _shared->SetLastError(VE_NOT_INITED, kTraceError);
+    return -1;
+  }
+  voe::ScopedChannel sc(_shared->channel_manager(), channel);
+  voe::Channel* channel_ptr = sc.ChannelPtr();
+  if (channel_ptr == NULL) {
+    _shared->SetLastError(VE_CHANNEL_NOT_VALID, kTraceError,
+                          "GetLeastRequiredDelayMs() failed to locate channel");
+    return -1;
+  }
+  return channel_ptr->least_required_delay_ms();
+}
 
 #endif  // #ifdef WEBRTC_VOICE_ENGINE_VIDEO_SYNC_API
 
diff --git a/webrtc/voice_engine/voe_video_sync_impl.h b/webrtc/voice_engine/voe_video_sync_impl.h
index fafefd1..932c8cd 100644
--- a/webrtc/voice_engine/voe_video_sync_impl.h
+++ b/webrtc/voice_engine/voe_video_sync_impl.h
@@ -30,6 +30,8 @@
                                  int* jitter_buffer_delay_ms,
                                  int* playout_buffer_delay_ms);
 
+    virtual int GetLeastRequiredDelayMs(int channel) const;
+
     virtual int SetInitTimestamp(int channel, unsigned int timestamp);
 
     virtual int SetInitSequenceNumber(int channel, short sequenceNumber);