Change audio/video sync to be based on mapping RTP timestamps to NTP.

Video Engine:
- Instead compensate for video capture delay by modifying RTP timestamps.
- Calculate the relative offset between audio and video by converting
  RTP timestamps to NTP and comparing receive time.

RTP/RTCP module:
- Removes the awkward video modification of NTP to compensate
  for video capture delay.
- Adjust RTCP RTP timestamp generation in rtcp_sender to have the same offset
  as packets being sent from rtp_sender.

BUG=
TEST=trybots,steam_synchronization_unittest

Review URL: https://webrtc-codereview.appspot.com/669010

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2733 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/src/video_engine/stream_synchronization.cc b/src/video_engine/stream_synchronization.cc
index 1ba1f09..fedea4a 100644
--- a/src/video_engine/stream_synchronization.cc
+++ b/src/video_engine/stream_synchronization.cc
@@ -9,15 +9,126 @@
  */
 
 #include "video_engine/stream_synchronization.h"
+
+#include <assert.h>
+#include <algorithm>
+#include <cmath>
+
 #include "system_wrappers/interface/trace.h"
 
 namespace webrtc {
 
-enum { kMaxVideoDiffMs = 80 };
-enum { kMaxAudioDiffMs = 80 };
-enum { kMaxDelay = 1500 };
+const int kMaxVideoDiffMs = 80;
+const int kMaxAudioDiffMs = 80;
+const int kMaxDelay = 1500;
 
-const float FracMS = 4.294967296E6f;
+const double kNtpFracPerMs = 4.294967296E6;
+
+namespace synchronization {
+
+RtcpMeasurement::RtcpMeasurement()
+    : ntp_secs(0), ntp_frac(0), rtp_timestamp(0) {}
+
+RtcpMeasurement::RtcpMeasurement(uint32_t ntp_secs, uint32_t ntp_frac,
+                                 uint32_t timestamp)
+    : ntp_secs(ntp_secs), ntp_frac(ntp_frac), rtp_timestamp(timestamp) {}
+
+// Calculates the RTP timestamp frequency from two pairs of NTP and RTP
+// timestamps.
+bool CalculateFrequency(
+    int64_t rtcp_ntp_ms1,
+    uint32_t rtp_timestamp1,
+    int64_t rtcp_ntp_ms2,
+    uint32_t rtp_timestamp2,
+    double* frequency_khz) {
+  if (rtcp_ntp_ms1 == rtcp_ntp_ms2) {
+    return false;
+  }
+  assert(rtcp_ntp_ms1 > rtcp_ntp_ms2);
+  *frequency_khz = static_cast<double>(rtp_timestamp1 - rtp_timestamp2) /
+      static_cast<double>(rtcp_ntp_ms1 - rtcp_ntp_ms2);
+  return true;
+}
+
+// Detects if there has been a wraparound between |old_timestamp| and
+// |new_timestamp|, and compensates by adding 2^32 if that is the case.
+bool CompensateForWrapAround(uint32_t new_timestamp,
+                             uint32_t old_timestamp,
+                             int64_t* compensated_timestamp) {
+  assert(compensated_timestamp);
+  int64_t wraps = synchronization::CheckForWrapArounds(new_timestamp,
+                                                       old_timestamp);
+  if (wraps < 0) {
+    // Reordering, don't use this packet.
+    return false;
+  }
+  *compensated_timestamp = new_timestamp + (wraps << 32);
+  return true;
+}
+
+// Converts an NTP timestamp to a millisecond timestamp.
+int64_t NtpToMs(uint32_t ntp_secs, uint32_t ntp_frac) {
+  const double ntp_frac_ms = static_cast<double>(ntp_frac) / kNtpFracPerMs;
+  return ntp_secs * 1000 + ntp_frac_ms + 0.5;
+}
+
+// Converts |rtp_timestamp| to the NTP time base using the NTP and RTP timestamp
+// pairs in |rtcp|. The converted timestamp is returned in
+// |rtp_timestamp_in_ms|. This function compensates for wrap arounds in RTP
+// timestamps and returns false if it can't do the conversion due to reordering.
+bool RtpToNtpMs(int64_t rtp_timestamp,
+                const synchronization::RtcpList& rtcp,
+                int64_t* rtp_timestamp_in_ms) {
+  assert(rtcp.size() == 2);
+  int64_t rtcp_ntp_ms_new = synchronization::NtpToMs(rtcp.front().ntp_secs,
+                                                     rtcp.front().ntp_frac);
+  int64_t rtcp_ntp_ms_old = synchronization::NtpToMs(rtcp.back().ntp_secs,
+                                                     rtcp.back().ntp_frac);
+  int64_t rtcp_timestamp_new = rtcp.front().rtp_timestamp;
+  int64_t rtcp_timestamp_old = rtcp.back().rtp_timestamp;
+  if (!CompensateForWrapAround(rtcp_timestamp_new,
+                               rtcp_timestamp_old,
+                               &rtcp_timestamp_new)) {
+    return false;
+  }
+  double freq_khz;
+  if (!CalculateFrequency(rtcp_ntp_ms_new,
+                          rtcp_timestamp_new,
+                          rtcp_ntp_ms_old,
+                          rtcp_timestamp_old,
+                          &freq_khz)) {
+    return false;
+  }
+  double offset = rtcp_timestamp_new - freq_khz * rtcp_ntp_ms_new;
+  int64_t rtp_timestamp_unwrapped;
+  if (!CompensateForWrapAround(rtp_timestamp, rtcp_timestamp_old,
+                               &rtp_timestamp_unwrapped)) {
+    return false;
+  }
+  double rtp_timestamp_ntp_ms = (static_cast<double>(rtp_timestamp_unwrapped) -
+      offset) / freq_khz + 0.5f;
+  assert(rtp_timestamp_ntp_ms >= 0);
+  *rtp_timestamp_in_ms = rtp_timestamp_ntp_ms;
+  return true;
+}
+
+int CheckForWrapArounds(uint32_t new_timestamp, uint32_t old_timestamp) {
+  if (new_timestamp < old_timestamp) {
+    // This difference should be less than -2^31 if we have had a wrap around
+    // (e.g. |new_timestamp| = 1, |rtcp_rtp_timestamp| = 2^32 - 1). Since it is
+    // cast to a int32_t, it should be positive.
+    if (static_cast<int32_t>(new_timestamp - old_timestamp) > 0) {
+      // Forward wrap around.
+      return 1;
+    }
+  } else if (static_cast<int32_t>(old_timestamp - new_timestamp) > 0) {
+    // This difference should be less than -2^31 if we have had a backward wrap
+    // around. Since it is cast to a int32_t, it should be positive.
+    return -1;
+  }
+  return 0;
+}
+}  // namespace synchronization
 
 struct ViESyncDelay {
   ViESyncDelay() {
@@ -45,41 +156,45 @@
   delete channel_delay_;
 }
 
-int StreamSynchronization::ComputeDelays(const Measurements& audio,
-                                         int current_audio_delay_ms,
-                                         int* extra_audio_delay_ms,
-                                         const Measurements& video,
-                                         int* total_video_delay_target_ms) {
-  // ReceivedNTPxxx is NTP at sender side when sent.
-  // RTCPArrivalTimexxx is NTP at receiver side when received.
-  // can't use ConvertNTPTimeToMS since calculation can be
-  //  negative
-  int NTPdiff = (audio.received_ntp_secs - video.received_ntp_secs)
-                * 1000;  // ms
-  float ntp_diff_frac = audio.received_ntp_frac / FracMS -
-        video.received_ntp_frac / FracMS;
-  if (ntp_diff_frac > 0.0f)
-    NTPdiff += static_cast<int>(ntp_diff_frac + 0.5f);
-  else
-    NTPdiff += static_cast<int>(ntp_diff_frac - 0.5f);
-
-  int RTCPdiff = (audio.rtcp_arrivaltime_secs - video.rtcp_arrivaltime_secs)
-                 * 1000;  // ms
-  float rtcp_diff_frac = audio.rtcp_arrivaltime_frac / FracMS -
-        video.rtcp_arrivaltime_frac / FracMS;
-  if (rtcp_diff_frac > 0.0f)
-    RTCPdiff += static_cast<int>(rtcp_diff_frac + 0.5f);
-  else
-    RTCPdiff += static_cast<int>(rtcp_diff_frac - 0.5f);
-
-  int diff = NTPdiff - RTCPdiff;
-  // if diff is + video is behind
-  if (diff < -1000 || diff > 1000) {
-    // unresonable ignore value.
-    return -1;
+bool StreamSynchronization::ComputeRelativeDelay(
+    const Measurements& audio_measurement,
+    const Measurements& video_measurement,
+    int* relative_delay_ms) {
+  assert(relative_delay_ms);
+  if (audio_measurement.rtcp.size() < 2 || video_measurement.rtcp.size() < 2) {
+    // We need two RTCP SR reports per stream to do synchronization.
+    return false;
   }
-  channel_delay_->network_delay = diff;
+  int64_t audio_last_capture_time_ms;
+  if (!synchronization::RtpToNtpMs(audio_measurement.latest_timestamp,
+                                   audio_measurement.rtcp,
+                                   &audio_last_capture_time_ms)) {
+    return false;
+  }
+  int64_t video_last_capture_time_ms;
+  if (!synchronization::RtpToNtpMs(video_measurement.latest_timestamp,
+                                   video_measurement.rtcp,
+                                   &video_last_capture_time_ms)) {
+    return false;
+  }
+  if (video_last_capture_time_ms < 0) {
+    return false;
+  }
+  // Positive diff means that video_measurement is behind audio_measurement.
+  *relative_delay_ms = video_measurement.latest_receive_time_ms -
+      audio_measurement.latest_receive_time_ms -
+      (video_last_capture_time_ms - audio_last_capture_time_ms);
+  if (*relative_delay_ms > 1000 || *relative_delay_ms < -1000) {
+    return false;
+  }
+  return true;
+}
 
+bool StreamSynchronization::ComputeDelays(int relative_delay_ms,
+                                          int current_audio_delay_ms,
+                                          int* extra_audio_delay_ms,
+                                          int* total_video_delay_target_ms) {
+  assert(extra_audio_delay_ms && total_video_delay_target_ms);
   WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
                "Audio delay is: %d for voice channel: %d",
                current_audio_delay_ms, audio_channel_id_);
@@ -88,11 +203,12 @@
                channel_delay_->network_delay, audio_channel_id_);
   // Calculate the difference between the lowest possible video delay and
   // the current audio delay.
-  int current_diff_ms = *total_video_delay_target_ms - current_audio_delay_ms +
-      channel_delay_->network_delay;
   WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_,
                "Current diff is: %d for audio channel: %d",
-               current_diff_ms, audio_channel_id_);
+               relative_delay_ms, audio_channel_id_);
+
+  int current_diff_ms = *total_video_delay_target_ms - current_audio_delay_ms +
+      relative_delay_ms;
 
   int video_delay_ms = 0;
   if (current_diff_ms > 0) {
@@ -235,6 +351,6 @@
   *total_video_delay_target_ms =
       (*total_video_delay_target_ms  >  video_delay_ms) ?
       *total_video_delay_target_ms : video_delay_ms;
-  return 0;
+  return true;
 }
 }  // namespace webrtc