Hook up audio/video sync to Call.

Adds an end-to-end audio/video sync test.

BUG=2530, 2608
TEST=trybots
R=henrika@webrtc.org, mflodman@webrtc.org, pbos@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/3699004

git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@5128 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/call.cc b/call.cc
index ec42e3e..dff913b 100644
--- a/call.cc
+++ b/call.cc
@@ -255,8 +255,8 @@
 
 VideoReceiveStream* Call::CreateReceiveStream(
     const VideoReceiveStream::Config& config) {
-  VideoReceiveStream* receive_stream =
-      new VideoReceiveStream(video_engine_, config, config_.send_transport);
+  VideoReceiveStream* receive_stream = new VideoReceiveStream(
+      video_engine_, config, config_.send_transport, config_.voice_engine);
 
   WriteLockScoped write_lock(*receive_lock_);
   assert(receive_ssrcs_.find(config.rtp.ssrc) == receive_ssrcs_.end());
diff --git a/call_tests.cc b/call_tests.cc
index afd9ce0..3c8d78e 100644
--- a/call_tests.cc
+++ b/call_tests.cc
@@ -9,23 +9,36 @@
  */
 #include <assert.h>
 
+#include <algorithm>
 #include <map>
+#include <sstream>
+#include <string>
 
 #include "testing/gtest/include/gtest/gtest.h"
 
 #include "webrtc/call.h"
 #include "webrtc/common_video/test/frame_generator.h"
+#include "webrtc/modules/remote_bitrate_estimator/include/rtp_to_ntp.h"
 #include "webrtc/modules/rtp_rtcp/interface/rtp_header_parser.h"
 #include "webrtc/modules/rtp_rtcp/source/rtcp_utility.h"
 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
 #include "webrtc/system_wrappers/interface/event_wrapper.h"
 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/video/transport_adapter.h"
+#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/voice_engine/include/voe_codec.h"
+#include "webrtc/voice_engine/include/voe_network.h"
+#include "webrtc/voice_engine/include/voe_rtp_rtcp.h"
+#include "webrtc/voice_engine/include/voe_video_sync.h"
+#include "webrtc/voice_engine/test/auto_test/resource_manager.h"
 #include "webrtc/test/direct_transport.h"
+#include "webrtc/test/fake_audio_device.h"
 #include "webrtc/test/fake_decoder.h"
 #include "webrtc/test/fake_encoder.h"
 #include "webrtc/test/frame_generator_capturer.h"
 #include "webrtc/test/generate_ssrcs.h"
 #include "webrtc/test/rtp_rtcp_observer.h"
+#include "webrtc/test/testsupport/perf_test.h"
 
 namespace webrtc {
 
@@ -115,6 +128,7 @@
 
   void ReceivesPliAndRecovers(int rtp_history_ms);
   void RespectsRtcpMode(newapi::RtcpMode rtcp_mode);
+  void PlaysOutAudioAndVideoInSync();
 
   scoped_ptr<Call> sender_call_;
   scoped_ptr<Call> receiver_call_;
@@ -803,4 +817,226 @@
   sender_transport.StopSending();
   receiver_transport.StopSending();
 }
+
+class SyncRtcpObserver : public test::RtpRtcpObserver {
+ public:
+  SyncRtcpObserver(int delay_ms)
+      : test::RtpRtcpObserver(kLongTimeoutMs, delay_ms),
+        critical_section_(CriticalSectionWrapper::CreateCriticalSection()) {}
+
+  virtual Action OnSendRtcp(const uint8_t* packet, size_t length) OVERRIDE {
+    RTCPUtility::RTCPParserV2 parser(packet, length, true);
+    EXPECT_TRUE(parser.IsValid());
+
+    for (RTCPUtility::RTCPPacketTypes packet_type = parser.Begin();
+         packet_type != RTCPUtility::kRtcpNotValidCode;
+         packet_type = parser.Iterate()) {
+      if (packet_type == RTCPUtility::kRtcpSrCode) {
+        const RTCPUtility::RTCPPacket& packet = parser.Packet();
+        synchronization::RtcpMeasurement ntp_rtp_pair(
+            packet.SR.NTPMostSignificant,
+            packet.SR.NTPLeastSignificant,
+            packet.SR.RTPTimestamp);
+        StoreNtpRtpPair(ntp_rtp_pair);
+      }
+    }
+    return SEND_PACKET;
+  }
+
+  int64_t RtpTimestampToNtp(uint32_t timestamp) const {
+    CriticalSectionScoped cs(critical_section_.get());
+    int64_t timestamp_in_ms = -1;
+    if (ntp_rtp_pairs_.size() == 2) {
+      // TODO(stefan): We can't EXPECT_TRUE on this call due to a bug in the
+      // RTCP sender where it sends RTCP SR before any RTP packets, which leads
+      // to a bogus NTP/RTP mapping.
+      synchronization::RtpToNtpMs(timestamp, ntp_rtp_pairs_, &timestamp_in_ms);
+      return timestamp_in_ms;
+    }
+    return -1;
+  }
+
+ private:
+  void StoreNtpRtpPair(synchronization::RtcpMeasurement ntp_rtp_pair) {
+    CriticalSectionScoped cs(critical_section_.get());
+    for (synchronization::RtcpList::iterator it = ntp_rtp_pairs_.begin();
+         it != ntp_rtp_pairs_.end();
+         ++it) {
+      if (ntp_rtp_pair.ntp_secs == it->ntp_secs &&
+          ntp_rtp_pair.ntp_frac == it->ntp_frac) {
+        // This RTCP has already been added to the list.
+        return;
+      }
+    }
+    // We need two RTCP SR reports to map between RTP and NTP. More than two
+    // will not improve the mapping.
+    if (ntp_rtp_pairs_.size() == 2) {
+      ntp_rtp_pairs_.pop_back();
+    }
+    ntp_rtp_pairs_.push_front(ntp_rtp_pair);
+  }
+
+  scoped_ptr<CriticalSectionWrapper> critical_section_;
+  synchronization::RtcpList ntp_rtp_pairs_;
+};
+
+class VideoRtcpAndSyncObserver : public SyncRtcpObserver, public VideoRenderer {
+  static const int kInSyncThresholdMs = 50;
+  static const int kStartupTimeMs = 2000;
+  static const int kMinRunTimeMs = 30000;
+
+ public:
+  VideoRtcpAndSyncObserver(Clock* clock,
+                           int voe_channel,
+                           VoEVideoSync* voe_sync,
+                           SyncRtcpObserver* audio_observer)
+      : SyncRtcpObserver(0),
+        clock_(clock),
+        voe_channel_(voe_channel),
+        voe_sync_(voe_sync),
+        audio_observer_(audio_observer),
+        creation_time_ms_(clock_->TimeInMilliseconds()),
+        first_time_in_sync_(-1) {}
+
+  virtual void RenderFrame(const I420VideoFrame& video_frame,
+                           int time_to_render_ms) OVERRIDE {
+    int64_t now_ms = clock_->TimeInMilliseconds();
+    uint32_t playout_timestamp = 0;
+    if (voe_sync_->GetPlayoutTimestamp(voe_channel_, playout_timestamp) != 0)
+      return;
+    int64_t latest_audio_ntp =
+        audio_observer_->RtpTimestampToNtp(playout_timestamp);
+    int64_t latest_video_ntp = RtpTimestampToNtp(video_frame.timestamp());
+    if (latest_audio_ntp < 0 || latest_video_ntp < 0)
+      return;
+    int time_until_render_ms =
+        std::max(0, static_cast<int>(video_frame.render_time_ms() - now_ms));
+    latest_video_ntp += time_until_render_ms;
+    int64_t stream_offset = latest_audio_ntp - latest_video_ntp;
+    std::stringstream ss;
+    ss << stream_offset;
+    webrtc::test::PrintResult(
+        "stream_offset", "", "synchronization", ss.str(), "ms", false);
+    int64_t time_since_creation = now_ms - creation_time_ms_;
+    // During the first couple of seconds audio and video can falsely be
+    // estimated as being synchronized. We don't want to trigger on those.
+    if (time_since_creation < kStartupTimeMs)
+      return;
+    if (abs(latest_audio_ntp - latest_video_ntp) < kInSyncThresholdMs) {
+      if (first_time_in_sync_ == -1) {
+        first_time_in_sync_ = now_ms;
+        webrtc::test::PrintResult("sync_convergence_time",
+                                  "",
+                                  "synchronization",
+                                  time_since_creation,
+                                  "ms",
+                                  false);
+      }
+      if (time_since_creation > kMinRunTimeMs)
+        observation_complete_->Set();
+    }
+  }
+
+ private:
+  Clock* clock_;
+  int voe_channel_;
+  VoEVideoSync* voe_sync_;
+  SyncRtcpObserver* audio_observer_;
+  int64_t creation_time_ms_;
+  int64_t first_time_in_sync_;
+};
+
+TEST_F(CallTest, PlaysOutAudioAndVideoInSync) {
+  VoiceEngine* voice_engine = VoiceEngine::Create();
+  VoEBase* voe_base = VoEBase::GetInterface(voice_engine);
+  VoECodec* voe_codec = VoECodec::GetInterface(voice_engine);
+  VoENetwork* voe_network = VoENetwork::GetInterface(voice_engine);
+  VoEVideoSync* voe_sync = VoEVideoSync::GetInterface(voice_engine);
+  ResourceManager resource_manager;
+  const std::string audio_filename = resource_manager.long_audio_file_path();
+  ASSERT_STRNE("", audio_filename.c_str());
+  test::FakeAudioDevice fake_audio_device(Clock::GetRealTimeClock(),
+                                          audio_filename);
+  EXPECT_EQ(0, voe_base->Init(&fake_audio_device, NULL));
+  int channel = voe_base->CreateChannel();
+
+  const int kVoiceDelayMs = 500;
+  SyncRtcpObserver audio_observer(kVoiceDelayMs);
+  VideoRtcpAndSyncObserver observer(
+      Clock::GetRealTimeClock(), channel, voe_sync, &audio_observer);
+
+  Call::Config receiver_config(observer.ReceiveTransport());
+  receiver_config.voice_engine = voice_engine;
+  CreateCalls(Call::Config(observer.SendTransport()), receiver_config);
+  CodecInst isac = {103, "ISAC", 16000, 480, 1, 32000};
+  EXPECT_EQ(0, voe_codec->SetSendCodec(channel, isac));
+
+  class VoicePacketReceiver : public PacketReceiver {
+   public:
+    VoicePacketReceiver(int channel, VoENetwork* voe_network)
+        : channel_(channel),
+          voe_network_(voe_network),
+          parser_(RtpHeaderParser::Create()) {}
+    virtual bool DeliverPacket(const uint8_t* packet, size_t length) {
+      int ret;
+      if (parser_->IsRtcp(packet, static_cast<int>(length))) {
+        ret = voe_network_->ReceivedRTCPPacket(
+            channel_, packet, static_cast<unsigned int>(length));
+      } else {
+        ret = voe_network_->ReceivedRTPPacket(
+            channel_, packet, static_cast<unsigned int>(length));
+      }
+      return ret == 0;
+    }
+
+   private:
+    int channel_;
+    VoENetwork* voe_network_;
+    scoped_ptr<RtpHeaderParser> parser_;
+  } voe_packet_receiver(channel, voe_network);
+
+  audio_observer.SetReceivers(&voe_packet_receiver, &voe_packet_receiver);
+
+  internal::TransportAdapter transport_adapter(audio_observer.SendTransport());
+  EXPECT_EQ(0,
+            voe_network->RegisterExternalTransport(channel, transport_adapter));
+
+  observer.SetReceivers(receiver_call_->Receiver(), sender_call_->Receiver());
+
+  CreateTestConfigs();
+  send_config_.rtp.nack.rtp_history_ms = 1000;
+  receive_config_.rtp.nack.rtp_history_ms = 1000;
+  receive_config_.renderer = &observer;
+  receive_config_.audio_channel_id = channel;
+
+  CreateStreams();
+  CreateFrameGenerator();
+  StartSending();
+
+  fake_audio_device.Start();
+  EXPECT_EQ(0, voe_base->StartPlayout(channel));
+  EXPECT_EQ(0, voe_base->StartReceive(channel));
+  EXPECT_EQ(0, voe_base->StartSend(channel));
+
+  EXPECT_EQ(kEventSignaled, observer.Wait())
+      << "Timed out while waiting for audio and video to be synchronized.";
+
+  EXPECT_EQ(0, voe_base->StopSend(channel));
+  EXPECT_EQ(0, voe_base->StopReceive(channel));
+  EXPECT_EQ(0, voe_base->StopPlayout(channel));
+  fake_audio_device.Stop();
+
+  StopSending();
+  observer.StopSending();
+  audio_observer.StopSending();
+
+  voe_base->DeleteChannel(channel);
+  voe_base->Release();
+  voe_codec->Release();
+  voe_network->Release();
+  voe_sync->Release();
+  DestroyStreams();
+  VoiceEngine::Delete(voice_engine);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_device/include/fake_audio_device.h b/modules/audio_device/include/fake_audio_device.h
index 7966716..0248317 100644
--- a/modules/audio_device/include/fake_audio_device.h
+++ b/modules/audio_device/include/fake_audio_device.h
@@ -15,7 +15,7 @@
 class FakeAudioDeviceModule : public AudioDeviceModule {
  public:
   FakeAudioDeviceModule() {}
-  ~FakeAudioDeviceModule() {}
+  virtual ~FakeAudioDeviceModule() {}
   virtual int32_t AddRef() { return 0; }
   virtual int32_t Release() { return 0; }
   virtual int32_t RegisterEventObserver(AudioDeviceObserver* eventCallback) {
@@ -48,283 +48,112 @@
   virtual int32_t Process() { return 0; }
   virtual int32_t Terminate() { return 0; }
 
-  virtual int32_t ActiveAudioLayer(AudioLayer* audioLayer) const {
-    assert(false);
-    return 0;
-  }
-  virtual ErrorCode LastError() const {
-    assert(false);
-    return  kAdmErrNone;
-  }
-  virtual bool Initialized() const {
-    assert(false);
-    return true;
-  }
-  virtual int16_t PlayoutDevices() {
-    assert(false);
-    return 0;
-  }
-  virtual int16_t RecordingDevices() {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t ActiveAudioLayer(AudioLayer* audioLayer) const { return 0; }
+  virtual ErrorCode LastError() const { return kAdmErrNone; }
+  virtual bool Initialized() const { return true; }
+  virtual int16_t PlayoutDevices() { return 0; }
+  virtual int16_t RecordingDevices() { return 0; }
   virtual int32_t PlayoutDeviceName(uint16_t index,
                             char name[kAdmMaxDeviceNameSize],
                             char guid[kAdmMaxGuidSize]) {
-    assert(false);
     return 0;
   }
   virtual int32_t RecordingDeviceName(uint16_t index,
                               char name[kAdmMaxDeviceNameSize],
                               char guid[kAdmMaxGuidSize]) {
-    assert(false);
     return 0;
   }
-  virtual int32_t PlayoutIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t InitPlayout() {
-    assert(false);
-    return 0;
-  }
-  virtual bool PlayoutIsInitialized() const {
-    assert(false);
-    return true;
-  }
-  virtual int32_t RecordingIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t InitRecording() {
-    assert(false);
-    return 0;
-  }
-  virtual bool RecordingIsInitialized() const {
-    assert(false);
-    return true;
-  }
-  virtual int32_t StartPlayout() {
-    assert(false);
-    return 0;
-  }
-  virtual bool Playing() const {
-    assert(false);
-    return false;
-  }
-  virtual int32_t StartRecording() {
-    assert(false);
-    return 0;
-  }
-  virtual bool Recording() const {
-    assert(false);
-    return false;
-  }
-  virtual bool AGC() const {
-    assert(false);
-    return true;
-  }
+  virtual int32_t PlayoutIsAvailable(bool* available) { return 0; }
+  virtual int32_t InitPlayout() { return 0; }
+  virtual bool PlayoutIsInitialized() const { return true; }
+  virtual int32_t RecordingIsAvailable(bool* available) { return 0; }
+  virtual int32_t InitRecording() { return 0; }
+  virtual bool RecordingIsInitialized() const { return true; }
+  virtual int32_t StartPlayout() { return 0; }
+  virtual bool Playing() const { return false; }
+  virtual int32_t StartRecording() { return 0; }
+  virtual bool Recording() const { return false; }
+  virtual bool AGC() const { return true; }
   virtual int32_t SetWaveOutVolume(uint16_t volumeLeft,
                            uint16_t volumeRight) {
-    assert(false);
     return 0;
   }
   virtual int32_t WaveOutVolume(uint16_t* volumeLeft,
                         uint16_t* volumeRight) const {
-    assert(false);
     return 0;
   }
-  virtual bool SpeakerIsInitialized() const {
-    assert(false);
-    return true;
-  }
-  virtual bool MicrophoneIsInitialized() const {
-    assert(false);
-    return true;
-  }
-  virtual int32_t SpeakerVolumeIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetSpeakerVolume(uint32_t volume) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SpeakerVolume(uint32_t* volume) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MaxSpeakerVolume(uint32_t* maxVolume) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MinSpeakerVolume(uint32_t* minVolume) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SpeakerVolumeStepSize(uint16_t* stepSize) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneVolumeIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetMicrophoneVolume(uint32_t volume) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneVolume(uint32_t* volume) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MinMicrophoneVolume(uint32_t* minVolume) const {
-    assert(false);
-    return 0;
-  }
+  virtual bool SpeakerIsInitialized() const { return true; }
+  virtual bool MicrophoneIsInitialized() const { return true; }
+  virtual int32_t SpeakerVolumeIsAvailable(bool* available) { return 0; }
+  virtual int32_t SetSpeakerVolume(uint32_t volume) { return 0; }
+  virtual int32_t SpeakerVolume(uint32_t* volume) const { return 0; }
+  virtual int32_t MaxSpeakerVolume(uint32_t* maxVolume) const { return 0; }
+  virtual int32_t MinSpeakerVolume(uint32_t* minVolume) const { return 0; }
+  virtual int32_t SpeakerVolumeStepSize(uint16_t* stepSize) const { return 0; }
+  virtual int32_t MicrophoneVolumeIsAvailable(bool* available) { return 0; }
+  virtual int32_t SetMicrophoneVolume(uint32_t volume) { return 0; }
+  virtual int32_t MicrophoneVolume(uint32_t* volume) const { return 0; }
+  virtual int32_t MaxMicrophoneVolume(uint32_t* maxVolume) const { return 0; }
+  virtual int32_t MinMicrophoneVolume(uint32_t* minVolume) const { return 0; }
   virtual int32_t MicrophoneVolumeStepSize(uint16_t* stepSize) const {
-    assert(false);
     return 0;
   }
-  virtual int32_t SpeakerMuteIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetSpeakerMute(bool enable) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SpeakerMute(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneMuteIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetMicrophoneMute(bool enable) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneMute(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneBoostIsAvailable(bool* available) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetMicrophoneBoost(bool enable) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t MicrophoneBoost(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t SpeakerMuteIsAvailable(bool* available) { return 0; }
+  virtual int32_t SetSpeakerMute(bool enable) { return 0; }
+  virtual int32_t SpeakerMute(bool* enabled) const { return 0; }
+  virtual int32_t MicrophoneMuteIsAvailable(bool* available) { return 0; }
+  virtual int32_t SetMicrophoneMute(bool enable) { return 0; }
+  virtual int32_t MicrophoneMute(bool* enabled) const { return 0; }
+  virtual int32_t MicrophoneBoostIsAvailable(bool* available) { return 0; }
+  virtual int32_t SetMicrophoneBoost(bool enable) { return 0; }
+  virtual int32_t MicrophoneBoost(bool* enabled) const { return 0; }
   virtual int32_t StereoPlayoutIsAvailable(bool* available) const {
     *available = false;
     return 0;
   }
-  virtual int32_t StereoPlayout(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t StereoPlayout(bool* enabled) const { return 0; }
   virtual int32_t StereoRecordingIsAvailable(bool* available) const {
     *available = false;
     return 0;
   }
-  virtual int32_t StereoRecording(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetRecordingChannel(const ChannelType channel) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t RecordingChannel(ChannelType* channel) const {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t StereoRecording(bool* enabled) const { return 0; }
+  virtual int32_t SetRecordingChannel(const ChannelType channel) { return 0; }
+  virtual int32_t RecordingChannel(ChannelType* channel) const { return 0; }
   virtual int32_t SetPlayoutBuffer(const BufferType type,
                            uint16_t sizeMS = 0) {
-    assert(false);
     return 0;
   }
   virtual int32_t PlayoutBuffer(BufferType* type, uint16_t* sizeMS) const {
-    assert(false);
     return 0;
   }
-  virtual int32_t PlayoutDelay(uint16_t* delayMS) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t RecordingDelay(uint16_t* delayMS) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t CPULoad(uint16_t* load) const {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t PlayoutDelay(uint16_t* delayMS) const { return 0; }
+  virtual int32_t RecordingDelay(uint16_t* delayMS) const { return 0; }
+  virtual int32_t CPULoad(uint16_t* load) const { return 0; }
   virtual int32_t StartRawOutputFileRecording(
       const char pcmFileNameUTF8[kAdmMaxFileNameSize]) {
-    assert(false);
     return 0;
   }
-  virtual int32_t StopRawOutputFileRecording() {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t StopRawOutputFileRecording() { return 0; }
   virtual int32_t StartRawInputFileRecording(
       const char pcmFileNameUTF8[kAdmMaxFileNameSize]) {
-    assert(false);
     return 0;
   }
-  virtual int32_t StopRawInputFileRecording() {
-    assert(false);
-    return 0;
-  }
+  virtual int32_t StopRawInputFileRecording() { return 0; }
   virtual int32_t SetRecordingSampleRate(const uint32_t samplesPerSec) {
-    assert(false);
     return 0;
   }
   virtual int32_t RecordingSampleRate(uint32_t* samplesPerSec) const {
-    assert(false);
     return 0;
   }
   virtual int32_t SetPlayoutSampleRate(const uint32_t samplesPerSec) {
-    assert(false);
     return 0;
   }
-  virtual int32_t PlayoutSampleRate(uint32_t* samplesPerSec) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t ResetAudioDevice() {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t SetLoudspeakerStatus(bool enable) {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t GetLoudspeakerStatus(bool* enabled) const {
-    assert(false);
-    return 0;
-  }
-  virtual int32_t EnableBuiltInAEC(bool enable) {
-    assert(false);
-    return -1;
-  }
-  virtual bool BuiltInAECIsEnabled() const {
-    assert(false);
-    return false;
-  }
+  virtual int32_t PlayoutSampleRate(uint32_t* samplesPerSec) const { return 0; }
+  virtual int32_t ResetAudioDevice() { return 0; }
+  virtual int32_t SetLoudspeakerStatus(bool enable) { return 0; }
+  virtual int32_t GetLoudspeakerStatus(bool* enabled) const { return 0; }
+  virtual int32_t EnableBuiltInAEC(bool enable) { return -1; }
+  virtual bool BuiltInAECIsEnabled() const { return false; }
 };
 
 }  // namespace webrtc
diff --git a/modules/media_file/source/media_file.gypi b/modules/media_file/source/media_file.gypi
index 0d1b15f..3add36c 100644
--- a/modules/media_file/source/media_file.gypi
+++ b/modules/media_file/source/media_file.gypi
@@ -26,6 +26,9 @@
           '../interface',
           '../../interface',
         ],
+        'defines': [
+          'WEBRTC_MODULE_UTILITY_VIDEO',
+        ],
       },
       'sources': [
         '../interface/media_file.h',
diff --git a/modules/media_file/source/media_file_utility.cc b/modules/media_file/source/media_file_utility.cc
index 04022ad..85df0b3 100644
--- a/modules/media_file/source/media_file_utility.cc
+++ b/modules/media_file/source/media_file_utility.cc
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "webrtc/modules/media_file/source/media_file_utility.h"
+
 #include <assert.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -15,7 +17,6 @@
 #include "webrtc/common_types.h"
 #include "webrtc/engine_configurations.h"
 #include "webrtc/modules/interface/module_common_types.h"
-#include "webrtc/modules/media_file/source/media_file_utility.h"
 #include "webrtc/system_wrappers/interface/file_wrapper.h"
 #include "webrtc/system_wrappers/interface/trace.h"
 
diff --git a/test/direct_transport.cc b/test/direct_transport.cc
index af8ebcd..aed7002 100644
--- a/test/direct_transport.cc
+++ b/test/direct_transport.cc
@@ -12,6 +12,7 @@
 #include "testing/gtest/include/gtest/gtest.h"
 
 #include "webrtc/call.h"
+#include "webrtc/system_wrappers/interface/clock.h"
 
 namespace webrtc {
 namespace test {
@@ -20,8 +21,22 @@
     : lock_(CriticalSectionWrapper::CreateCriticalSection()),
       packet_event_(EventWrapper::Create()),
       thread_(ThreadWrapper::CreateThread(NetworkProcess, this)),
+      clock_(Clock::GetRealTimeClock()),
       shutting_down_(false),
-      receiver_(NULL) {
+      receiver_(NULL),
+      delay_ms_(0) {
+  unsigned int thread_id;
+  EXPECT_TRUE(thread_->Start(thread_id));
+}
+
+DirectTransport::DirectTransport(int delay_ms)
+    : lock_(CriticalSectionWrapper::CreateCriticalSection()),
+      packet_event_(EventWrapper::Create()),
+      thread_(ThreadWrapper::CreateThread(NetworkProcess, this)),
+      clock_(Clock::GetRealTimeClock()),
+      shutting_down_(false),
+      receiver_(NULL),
+      delay_ms_(delay_ms) {
   unsigned int thread_id;
   EXPECT_TRUE(thread_->Start(thread_id));
 }
@@ -43,28 +58,32 @@
 }
 
 bool DirectTransport::SendRTP(const uint8_t* data, size_t length) {
-  QueuePacket(data, length);
+  QueuePacket(data, length, clock_->TimeInMilliseconds() + delay_ms_);
   return true;
 }
 
 bool DirectTransport::SendRTCP(const uint8_t* data, size_t length) {
-  QueuePacket(data, length);
+  QueuePacket(data, length, clock_->TimeInMilliseconds() + delay_ms_);
   return true;
 }
 
-DirectTransport::Packet::Packet() : length(0) {}
+DirectTransport::Packet::Packet() : length(0), delivery_time_ms(0) {}
 
-DirectTransport::Packet::Packet(const uint8_t* data, size_t length)
-    : length(length) {
+DirectTransport::Packet::Packet(const uint8_t* data,
+                                size_t length,
+                                int64_t delivery_time_ms)
+    : length(length), delivery_time_ms(delivery_time_ms) {
   EXPECT_LE(length, sizeof(this->data));
   memcpy(this->data, data, length);
 }
 
-void DirectTransport::QueuePacket(const uint8_t* data, size_t length) {
+void DirectTransport::QueuePacket(const uint8_t* data,
+                                  size_t length,
+                                  int64_t delivery_time_ms) {
   CriticalSectionScoped crit(lock_.get());
   if (receiver_ == NULL)
     return;
-  packet_queue_.push_back(Packet(data, length));
+  packet_queue_.push_back(Packet(data, length, delivery_time_ms));
   packet_event_->Set();
 }
 
@@ -80,12 +99,27 @@
       if (packet_queue_.empty())
         break;
       p = packet_queue_.front();
+      if (p.delivery_time_ms > clock_->TimeInMilliseconds())
+        break;
       packet_queue_.pop_front();
     }
     receiver_->DeliverPacket(p.data, p.length);
   }
+  uint32_t time_until_next_delivery = WEBRTC_EVENT_INFINITE;
+  {
+    CriticalSectionScoped crit(lock_.get());
+    if (!packet_queue_.empty()) {
+      int64_t now_ms = clock_->TimeInMilliseconds();
+      const int64_t delivery_time_ms = packet_queue_.front().delivery_time_ms;
+      if (delivery_time_ms > now_ms) {
+        time_until_next_delivery = delivery_time_ms - now_ms;
+      } else {
+        time_until_next_delivery = 0;
+      }
+    }
+  }
 
-  switch (packet_event_->Wait(WEBRTC_EVENT_INFINITE)) {
+  switch (packet_event_->Wait(time_until_next_delivery)) {
     case kEventSignaled:
       packet_event_->Reset();
       break;
diff --git a/test/direct_transport.h b/test/direct_transport.h
index d4cb45a..30412e0 100644
--- a/test/direct_transport.h
+++ b/test/direct_transport.h
@@ -22,6 +22,7 @@
 
 namespace webrtc {
 
+class Clock;
 class PacketReceiver;
 
 namespace test {
@@ -29,6 +30,7 @@
 class DirectTransport : public newapi::Transport {
  public:
   DirectTransport();
+  explicit DirectTransport(int delay_ms);
   ~DirectTransport();
 
   virtual void StopSending();
@@ -40,13 +42,16 @@
  private:
   struct Packet {
     Packet();
-    Packet(const uint8_t* data, size_t length);
+    Packet(const uint8_t* data, size_t length, int64_t delivery_time_ms);
 
     uint8_t data[1500];
     size_t length;
+    int64_t delivery_time_ms;
   };
 
-  void QueuePacket(const uint8_t* data, size_t length);
+  void QueuePacket(const uint8_t* data,
+                   size_t length,
+                   int64_t delivery_time_ms);
 
   static bool NetworkProcess(void* transport);
   bool SendPackets();
@@ -54,11 +59,14 @@
   scoped_ptr<CriticalSectionWrapper> lock_;
   scoped_ptr<EventWrapper> packet_event_;
   scoped_ptr<ThreadWrapper> thread_;
+  Clock* clock_;
 
   bool shutting_down_;
 
   std::deque<Packet> packet_queue_;
   PacketReceiver* receiver_;
+  // TODO(stefan): Replace this with FakeNetworkPipe.
+  const int delay_ms_;
 };
 }  // namespace test
 }  // namespace webrtc
diff --git a/test/fake_audio_device.cc b/test/fake_audio_device.cc
new file mode 100644
index 0000000..a6fe165
--- /dev/null
+++ b/test/fake_audio_device.cc
@@ -0,0 +1,146 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/test/fake_audio_device.h"
+
+#include <algorithm>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/media_file/source/media_file_utility.h"
+#include "webrtc/system_wrappers/interface/clock.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/event_wrapper.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/thread_wrapper.h"
+
+namespace webrtc {
+namespace test {
+
+FakeAudioDevice::FakeAudioDevice(Clock* clock, const std::string& filename)
+    : audio_callback_(NULL),
+      capturing_(false),
+      captured_audio_(),
+      playout_buffer_(),
+      last_playout_ms_(-1),
+      clock_(clock),
+      tick_(EventWrapper::Create()),
+      lock_(CriticalSectionWrapper::CreateCriticalSection()),
+      file_utility_(new ModuleFileUtility(0)),
+      input_stream_(FileWrapper::Create()) {
+  memset(captured_audio_, 0, sizeof(captured_audio_));
+  memset(playout_buffer_, 0, sizeof(playout_buffer_));
+  // Open audio input file as read-only and looping.
+  EXPECT_EQ(0, input_stream_->OpenFile(filename.c_str(), true, true))
+      << filename;
+}
+
+FakeAudioDevice::~FakeAudioDevice() {
+  Stop();
+
+  if (thread_.get() != NULL)
+    thread_->Stop();
+}
+
+int32_t FakeAudioDevice::Init() {
+  CriticalSectionScoped cs(lock_.get());
+  if (file_utility_->InitPCMReading(*input_stream_.get()) != 0)
+    return -1;
+
+  if (!tick_->StartTimer(true, 10))
+    return -1;
+  thread_.reset(ThreadWrapper::CreateThread(
+      FakeAudioDevice::Run, this, webrtc::kHighPriority, "FakeAudioDevice"));
+  if (thread_.get() == NULL)
+    return -1;
+  unsigned int thread_id;
+  if (!thread_->Start(thread_id)) {
+    thread_.reset();
+    return -1;
+  }
+  return 0;
+}
+
+int32_t FakeAudioDevice::RegisterAudioCallback(AudioTransport* callback) {
+  CriticalSectionScoped cs(lock_.get());
+  audio_callback_ = callback;
+  return 0;
+}
+
+bool FakeAudioDevice::Playing() const {
+  CriticalSectionScoped cs(lock_.get());
+  return capturing_;
+}
+
+int32_t FakeAudioDevice::PlayoutDelay(uint16_t* delay_ms) const {
+  *delay_ms = 0;
+  return 0;
+}
+
+bool FakeAudioDevice::Recording() const {
+  CriticalSectionScoped cs(lock_.get());
+  return capturing_;
+}
+
+bool FakeAudioDevice::Run(void* obj) {
+  static_cast<FakeAudioDevice*>(obj)->CaptureAudio();
+  return true;
+}
+
+void FakeAudioDevice::CaptureAudio() {
+  {
+    CriticalSectionScoped cs(lock_.get());
+    if (capturing_) {
+      int bytes_read = file_utility_->ReadPCMData(
+          *input_stream_.get(), captured_audio_, kBufferSizeBytes);
+      if (bytes_read <= 0)
+        return;
+      int num_samples = bytes_read / 2;  // 2 bytes per sample.
+      uint32_t new_mic_level;
+      EXPECT_EQ(0,
+                audio_callback_->RecordedDataIsAvailable(captured_audio_,
+                                                         num_samples,
+                                                         2,
+                                                         1,
+                                                         kFrequencyHz,
+                                                         0,
+                                                         0,
+                                                         0,
+                                                         false,
+                                                         new_mic_level));
+      uint32_t samples_needed = kFrequencyHz / 100;
+      int64_t now_ms = clock_->TimeInMilliseconds();
+      uint32_t time_since_last_playout_ms = now_ms - last_playout_ms_;
+      if (last_playout_ms_ > 0 && time_since_last_playout_ms > 0)
+        samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms,
+                                  kBufferSizeBytes / 2);
+      uint32_t samples_out = 0;
+      EXPECT_EQ(0,
+                audio_callback_->NeedMorePlayData(samples_needed,
+                                                  2,
+                                                  1,
+                                                  kFrequencyHz,
+                                                  playout_buffer_,
+                                                  samples_out));
+    }
+  }
+  tick_->Wait(WEBRTC_EVENT_INFINITE);
+}
+
+void FakeAudioDevice::Start() {
+  CriticalSectionScoped cs(lock_.get());
+  capturing_ = true;
+}
+
+void FakeAudioDevice::Stop() {
+  CriticalSectionScoped cs(lock_.get());
+  capturing_ = false;
+}
+}  // namespace test
+}  // namespace webrtc
diff --git a/test/fake_audio_device.h b/test/fake_audio_device.h
new file mode 100644
index 0000000..40a7547
--- /dev/null
+++ b/test/fake_audio_device.h
@@ -0,0 +1,69 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef WEBRTC_TEST_FAKE_AUDIO_DEVICE_H_
+#define WEBRTC_TEST_FAKE_AUDIO_DEVICE_H_
+
+#include <string>
+
+#include "webrtc/modules/audio_device/include/fake_audio_device.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class Clock;
+class CriticalSectionWrapper;
+class EventWrapper;
+class FileWrapper;
+class ModuleFileUtility;
+class ThreadWrapper;
+
+namespace test {
+
+class FakeAudioDevice : public FakeAudioDeviceModule {
+ public:
+  FakeAudioDevice(Clock* clock, const std::string& filename);
+
+  virtual ~FakeAudioDevice();
+
+  virtual int32_t Init() OVERRIDE;
+  virtual int32_t RegisterAudioCallback(AudioTransport* callback) OVERRIDE;
+
+  virtual bool Playing() const OVERRIDE;
+  virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
+  virtual bool Recording() const OVERRIDE;
+
+  void Start();
+  void Stop();
+
+ private:
+  static bool Run(void* obj);
+  void CaptureAudio();
+
+  static const uint32_t kFrequencyHz = 16000;
+  static const uint32_t kBufferSizeBytes = 2 * kFrequencyHz;
+
+  AudioTransport* audio_callback_;
+  bool capturing_;
+  int8_t captured_audio_[kBufferSizeBytes];
+  int8_t playout_buffer_[kBufferSizeBytes];
+  int64_t last_playout_ms_;
+
+  Clock* clock_;
+  scoped_ptr<EventWrapper> tick_;
+  scoped_ptr<CriticalSectionWrapper> lock_;
+  scoped_ptr<ThreadWrapper> thread_;
+  scoped_ptr<ModuleFileUtility> file_utility_;
+  scoped_ptr<FileWrapper> input_stream_;
+};
+}  // namespace test
+}  // namespace webrtc
+
+#endif  // WEBRTC_TEST_FAKE_AUDIO_DEVICE_H_
diff --git a/test/rtp_rtcp_observer.h b/test/rtp_rtcp_observer.h
index 922981c..39b43c0 100644
--- a/test/rtp_rtcp_observer.h
+++ b/test/rtp_rtcp_observer.h
@@ -46,18 +46,35 @@
   }
 
  protected:
-  RtpRtcpObserver(unsigned int event_timeout_ms)
+  RtpRtcpObserver(unsigned int event_timeout_ms, int delay_ms)
       : lock_(CriticalSectionWrapper::CreateCriticalSection()),
         observation_complete_(EventWrapper::Create()),
         parser_(RtpHeaderParser::Create()),
         send_transport_(lock_.get(),
                         this,
                         &RtpRtcpObserver::OnSendRtp,
-                        &RtpRtcpObserver::OnSendRtcp),
+                        &RtpRtcpObserver::OnSendRtcp,
+                        delay_ms),
         receive_transport_(lock_.get(),
                            this,
                            &RtpRtcpObserver::OnReceiveRtp,
-                           &RtpRtcpObserver::OnReceiveRtcp),
+                           &RtpRtcpObserver::OnReceiveRtcp,
+                           delay_ms),
+        timeout_ms_(event_timeout_ms) {}
+
+  explicit RtpRtcpObserver(unsigned int event_timeout_ms)
+      : lock_(CriticalSectionWrapper::CreateCriticalSection()),
+        observation_complete_(EventWrapper::Create()),
+        send_transport_(lock_.get(),
+                        this,
+                        &RtpRtcpObserver::OnSendRtp,
+                        &RtpRtcpObserver::OnSendRtcp,
+                        0),
+        receive_transport_(lock_.get(),
+                           this,
+                           &RtpRtcpObserver::OnReceiveRtp,
+                           &RtpRtcpObserver::OnReceiveRtcp,
+                           0),
         timeout_ms_(event_timeout_ms) {}
 
   enum Action {
@@ -87,11 +104,14 @@
    public:
     typedef Action (RtpRtcpObserver::*PacketTransportAction)(const uint8_t*,
                                                              size_t);
+
     PacketTransport(CriticalSectionWrapper* lock,
                     RtpRtcpObserver* observer,
                     PacketTransportAction on_rtp,
-                    PacketTransportAction on_rtcp)
-        : lock_(lock),
+                    PacketTransportAction on_rtcp,
+                    int delay_ms)
+        : test::DirectTransport(delay_ms),
+          lock_(lock),
           observer_(observer),
           on_rtp_(on_rtp),
           on_rtcp_(on_rtcp) {}
diff --git a/test/webrtc_test_common.gyp b/test/webrtc_test_common.gyp
index 1a7e579..5b546c7 100644
--- a/test/webrtc_test_common.gyp
+++ b/test/webrtc_test_common.gyp
@@ -16,6 +16,8 @@
       'sources': [
         'direct_transport.cc',
         'direct_transport.h',
+        'fake_audio_device.cc',
+        'fake_audio_device.h',
         'fake_decoder.cc',
         'fake_decoder.h',
         'fake_encoder.cc',
@@ -115,6 +117,7 @@
         '<(DEPTH)/testing/gtest.gyp:gtest',
         '<(DEPTH)/third_party/gflags/gflags.gyp:gflags',
         '<(webrtc_root)/modules/modules.gyp:video_capture_module',
+        '<(webrtc_root)/modules/modules.gyp:media_file',
         '<(webrtc_root)/test/test.gyp:test_support',
         '<(webrtc_root)/common_video/common_video.gyp:frame_generator',
       ],
diff --git a/video/video_receive_stream.cc b/video/video_receive_stream.cc
index a84c6d2..243c410 100644
--- a/video/video_receive_stream.cc
+++ b/video/video_receive_stream.cc
@@ -30,7 +30,8 @@
 
 VideoReceiveStream::VideoReceiveStream(webrtc::VideoEngine* video_engine,
                                        const VideoReceiveStream::Config& config,
-                                       newapi::Transport* transport)
+                                       newapi::Transport* transport,
+                                       webrtc::VoiceEngine* voice_engine)
     : transport_adapter_(transport), config_(config), channel_(-1) {
   video_engine_base_ = ViEBase::GetInterface(video_engine);
   // TODO(mflodman): Use the other CreateChannel method.
@@ -89,6 +90,11 @@
 
   render_->AddRenderer(channel_, kVideoI420, this);
 
+  if (voice_engine) {
+    video_engine_base_->SetVoiceEngine(voice_engine);
+    video_engine_base_->ConnectAudioChannel(channel_, config_.audio_channel_id);
+  }
+
   image_process_ = ViEImageProcess::GetInterface(video_engine);
   image_process_->RegisterPreRenderCallback(channel_,
                                             config_.pre_render_callback);
@@ -108,6 +114,7 @@
 
   network_->DeregisterSendTransport(channel_);
 
+  video_engine_base_->SetVoiceEngine(NULL);
   image_process_->Release();
   video_engine_base_->Release();
   external_codec_->Release();
diff --git a/video/video_receive_stream.h b/video/video_receive_stream.h
index c2352f4..e04b334 100644
--- a/video/video_receive_stream.h
+++ b/video/video_receive_stream.h
@@ -29,6 +29,7 @@
 class ViENetwork;
 class ViERender;
 class ViERTP_RTCP;
+class VoiceEngine;
 
 namespace internal {
 
@@ -37,7 +38,8 @@
  public:
   VideoReceiveStream(webrtc::VideoEngine* video_engine,
                      const VideoReceiveStream::Config& config,
-                     newapi::Transport* transport);
+                     newapi::Transport* transport,
+                     webrtc::VoiceEngine* voice_engine);
   virtual ~VideoReceiveStream();
 
   virtual void StartReceive() OVERRIDE;
diff --git a/video_engine_tests.isolate b/video_engine_tests.isolate
index e3d2381..af98afd 100644
--- a/video_engine_tests.isolate
+++ b/video_engine_tests.isolate
@@ -26,6 +26,7 @@
         ],
         'isolate_dependency_tracked': [
           '../DEPS',
+          '../data/voice_engine/audio_long16.pcm',
           '../resources/foreman_cif.yuv',
           '../resources/paris_qcif.yuv',
           '../testing/test_env.py',
diff --git a/webrtc_tests.gypi b/webrtc_tests.gypi
index bcf3b13..6ea74f4 100644
--- a/webrtc_tests.gypi
+++ b/webrtc_tests.gypi
@@ -36,6 +36,8 @@
         'video/full_stack.cc',
         'video/rampup_tests.cc',
         'video/video_send_stream_tests.cc',
+        'voice_engine/test/auto_test/resource_manager.cc',
+        'voice_engine/test/auto_test/resource_manager.h',
         'test/test_main.cc',
       ],
       'dependencies': [