Add stereo codec header and pass it through RTP

- Defines CodecSpecificInfoStereo that carries stereo specific header info from
encoded image.
- Defines RTPVideoHeaderStereo that carries the above info to packetizer,
see module_common_types.h.
- Adds an RTPPacketizer and RTPDepacketizer that supports passing specific stereo
header.
- Uses new data containers in StereoAdapter classes.

This CL is the step 3 for adding alpha channel support over the wire in webrtc.
See https://webrtc-review.googlesource.com/c/src/+/7800 for the experimental
CL that gives an idea about how it will come together.
Design Doc: https://goo.gl/sFeSUT

Bug: webrtc:7671
Change-Id: Ia932568fdd7065ba104afd2bc0ecf25a765748ab
Reviewed-on: https://webrtc-review.googlesource.com/22900
Reviewed-by: Emircan Uysaler <emircan@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Niklas Enbom <niklas.enbom@webrtc.org>
Commit-Queue: Emircan Uysaler <emircan@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20920}
diff --git a/modules/include/module_common_types.h b/modules/include/module_common_types.h
index 0db5a5e..1d89c7d 100644
--- a/modules/include/module_common_types.h
+++ b/modules/include/module_common_types.h
@@ -22,6 +22,7 @@
 #include "common_types.h"  // NOLINT(build/include)
 #include "modules/include/module_common_types_public.h"
 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
+#include "modules/video_coding/codecs/stereo/include/stereo_globals.h"
 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
 #include "rtc_base/constructormagic.h"
@@ -39,19 +40,27 @@
   size_t channel;                     // number of channels 2 = stereo
 };
 
+enum RtpVideoCodecTypes {
+  kRtpVideoNone = 0,
+  kRtpVideoGeneric = 1,
+  kRtpVideoVp8 = 2,
+  kRtpVideoVp9 = 3,
+  kRtpVideoH264 = 4,
+  kRtpVideoStereo = 5
+};
+
+struct RTPVideoHeaderStereo {
+  RtpVideoCodecTypes associated_codec_type;
+  StereoIndices indices;
+};
+
 union RTPVideoTypeHeader {
   RTPVideoHeaderVP8 VP8;
   RTPVideoHeaderVP9 VP9;
   RTPVideoHeaderH264 H264;
+  RTPVideoHeaderStereo stereo;
 };
 
-enum RtpVideoCodecTypes {
-  kRtpVideoNone,
-  kRtpVideoGeneric,
-  kRtpVideoVp8,
-  kRtpVideoVp9,
-  kRtpVideoH264
-};
 // Since RTPVideoHeader is used as a member of a union, it can't have a
 // non-trivial default constructor.
 struct RTPVideoHeader {
diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn
index 0174394..71e174e 100644
--- a/modules/rtp_rtcp/BUILD.gn
+++ b/modules/rtp_rtcp/BUILD.gn
@@ -134,6 +134,8 @@
     "source/rtp_format_h264.h",
     "source/rtp_format_video_generic.cc",
     "source/rtp_format_video_generic.h",
+    "source/rtp_format_video_stereo.cc",
+    "source/rtp_format_video_stereo.h",
     "source/rtp_format_vp8.cc",
     "source/rtp_format_vp8.h",
     "source/rtp_format_vp9.cc",
@@ -355,6 +357,7 @@
       "source/rtp_fec_unittest.cc",
       "source/rtp_format_h264_unittest.cc",
       "source/rtp_format_video_generic_unittest.cc",
+      "source/rtp_format_video_stereo_unittest.cc",
       "source/rtp_format_vp8_test_helper.cc",
       "source/rtp_format_vp8_test_helper.h",
       "source/rtp_format_vp8_unittest.cc",
diff --git a/modules/rtp_rtcp/source/rtp_format.cc b/modules/rtp_rtcp/source/rtp_format.cc
index 05dc900..c5f5750 100644
--- a/modules/rtp_rtcp/source/rtp_format.cc
+++ b/modules/rtp_rtcp/source/rtp_format.cc
@@ -14,6 +14,7 @@
 
 #include "modules/rtp_rtcp/source/rtp_format_h264.h"
 #include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
+#include "modules/rtp_rtcp/source/rtp_format_video_stereo.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp8.h"
 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
 
@@ -36,6 +37,10 @@
       RTC_CHECK(rtp_type_header);
       return new RtpPacketizerVp9(rtp_type_header->VP9, max_payload_len,
                                   last_packet_reduction_len);
+    case kRtpVideoStereo:
+      return new RtpPacketizerStereo(rtp_type_header->stereo, frame_type,
+                                     max_payload_len,
+                                     last_packet_reduction_len);
     case kRtpVideoGeneric:
       return new RtpPacketizerGeneric(frame_type, max_payload_len,
                                       last_packet_reduction_len);
@@ -53,6 +58,8 @@
       return new RtpDepacketizerVp8();
     case kRtpVideoVp9:
       return new RtpDepacketizerVp9();
+    case kRtpVideoStereo:
+      return new RtpDepacketizerStereo();
     case kRtpVideoGeneric:
       return new RtpDepacketizerGeneric();
     case kRtpVideoNone:
diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
index 85d6689..983bd8f 100644
--- a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
@@ -59,7 +59,7 @@
 
 }  // namespace
 
-TEST(RtpPacketizerVideoGeneric, AllPacketsMayBeEqual_RespectsMaxPayloadSize) {
+TEST(RtpPacketizerVideoGeneric, AllPacketsMayBeEqualAndRespectMaxPayloadSize) {
   const size_t kMaxPayloadLen = 6;
   const size_t kLastPacketReductionLen = 2;
   const size_t kPayloadSize = 13;
diff --git a/modules/rtp_rtcp/source/rtp_format_video_stereo.cc b/modules/rtp_rtcp/source/rtp_format_video_stereo.cc
new file mode 100644
index 0000000..7c3a933
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_format_video_stereo.cc
@@ -0,0 +1,164 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <memory>
+#include <string>
+
+#include "modules/include/module_common_types.h"
+#include "modules/rtp_rtcp/source/byte_io.h"
+#include "modules/rtp_rtcp/source/rtp_format_video_stereo.h"
+#include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+namespace {
+// Write the Stereo header descriptor.
+//       0 1 2 3 4 5 6 7 8
+//      +-+-+-+-+-+-+-+-+-+
+//      | VideoCodecType  | (optional)
+//      +-+-+-+-+-+-+-+-+-+
+//      |   frame_index   | (optional)
+//      +-+-+-+-+-+-+-+-+-+
+//      |   frame_count   | (optional)
+//      +-+-+-+-+-+-+-+-+-+
+//      |  picture_index  | (optional)
+//      |    (16 bits)    |
+//      +-+-+-+-+-+-+-+-+-+
+//      |  HeaderMarker   | (mandatory)
+//      +-+-+-+-+-+-+-+-+-+
+constexpr size_t kStereoHeaderLength =
+    sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint16_t);
+
+constexpr size_t kHeaderMarkerLength = 1;
+constexpr uint8_t kHeaderMarkerBit = 0x02;
+}  // namespace
+
+RtpPacketizerStereo::RtpPacketizerStereo(const RTPVideoHeaderStereo& header,
+                                         FrameType frame_type,
+                                         size_t max_payload_len,
+                                         size_t last_packet_reduction_len)
+    : header_(header),
+      max_payload_len_(max_payload_len - kHeaderMarkerLength),
+      last_packet_reduction_len_(last_packet_reduction_len +
+                                 kStereoHeaderLength),
+      packetizer_(frame_type, max_payload_len_, last_packet_reduction_len_) {}
+
+RtpPacketizerStereo::~RtpPacketizerStereo() {}
+
+size_t RtpPacketizerStereo::SetPayloadData(
+    const uint8_t* payload_data,
+    size_t payload_size,
+    const RTPFragmentationHeader* fragmentation) {
+  num_packets_remaining_ =
+      packetizer_.SetPayloadData(payload_data, payload_size, fragmentation);
+  return num_packets_remaining_;
+}
+
+bool RtpPacketizerStereo::NextPacket(RtpPacketToSend* packet) {
+  if (max_payload_len_ <= last_packet_reduction_len_) {
+    RTC_LOG(LS_ERROR) << "Payload length not large enough.";
+    return false;
+  }
+
+  RTC_DCHECK(packet);
+  if (!packetizer_.NextPacket(packet))
+    return false;
+
+  RTC_DCHECK_GT(num_packets_remaining_, 0);
+  const bool last_packet = --num_packets_remaining_ == 0;
+  const size_t header_length = last_packet
+                                   ? kHeaderMarkerLength + kStereoHeaderLength
+                                   : kHeaderMarkerLength;
+
+  const uint8_t* payload_ptr = packet->payload().data();
+  const size_t payload_size = packet->payload_size();
+  uint8_t* padded_payload_ptr =
+      packet->SetPayloadSize(header_length + packet->payload_size());
+  RTC_DCHECK(padded_payload_ptr);
+
+  padded_payload_ptr += payload_size;
+  if (last_packet) {
+    ByteWriter<uint8_t>::WriteBigEndian(
+        padded_payload_ptr,
+        static_cast<uint8_t>(header_.associated_codec_type));
+    padded_payload_ptr += sizeof(uint8_t);
+    ByteWriter<uint8_t>::WriteBigEndian(padded_payload_ptr,
+                                        header_.indices.frame_index);
+    padded_payload_ptr += sizeof(uint8_t);
+    ByteWriter<uint8_t>::WriteBigEndian(padded_payload_ptr,
+                                        header_.indices.frame_count);
+    padded_payload_ptr += sizeof(uint8_t);
+    ByteWriter<uint16_t>::WriteBigEndian(padded_payload_ptr,
+                                         header_.indices.picture_index);
+    padded_payload_ptr += sizeof(uint16_t);
+    RTC_DCHECK_EQ(payload_size + kStereoHeaderLength,
+                  padded_payload_ptr - payload_ptr);
+  }
+  padded_payload_ptr[0] = last_packet ? kHeaderMarkerBit : 0;
+  return true;
+}
+
+std::string RtpPacketizerStereo::ToString() {
+  return "RtpPacketizerStereo";
+}
+
+RtpDepacketizerStereo::~RtpDepacketizerStereo() {}
+
+bool RtpDepacketizerStereo::Parse(ParsedPayload* parsed_payload,
+                                  const uint8_t* payload_data,
+                                  size_t payload_data_length) {
+  RTC_DCHECK(parsed_payload);
+  if (payload_data_length == 0) {
+    RTC_LOG(LS_ERROR) << "Empty payload.";
+    return false;
+  }
+
+  uint8_t header_marker = payload_data[payload_data_length - 1];
+  --payload_data_length;
+  const bool last_packet = (header_marker & kHeaderMarkerBit) != 0;
+
+  if (last_packet) {
+    if (payload_data_length <= kStereoHeaderLength) {
+      RTC_LOG(LS_WARNING) << "Payload not large enough.";
+      return false;
+    }
+    size_t offset = payload_data_length - kStereoHeaderLength;
+    uint8_t associated_codec_type =
+        ByteReader<uint8_t>::ReadBigEndian(&payload_data[offset]);
+    switch (associated_codec_type) {
+      case kRtpVideoVp8:
+      case kRtpVideoVp9:
+      case kRtpVideoH264:
+        break;
+      default:
+        RTC_LOG(LS_WARNING) << "Unexpected codec type.";
+        return false;
+    }
+    parsed_payload->type.Video.codecHeader.stereo.associated_codec_type =
+        static_cast<RtpVideoCodecTypes>(associated_codec_type);
+    offset += sizeof(uint8_t);
+    parsed_payload->type.Video.codecHeader.stereo.indices.frame_index =
+        ByteReader<uint8_t>::ReadBigEndian(&payload_data[offset]);
+    offset += sizeof(uint8_t);
+    parsed_payload->type.Video.codecHeader.stereo.indices.frame_count =
+        ByteReader<uint8_t>::ReadBigEndian(&payload_data[offset]);
+    offset += sizeof(uint8_t);
+    parsed_payload->type.Video.codecHeader.stereo.indices.picture_index =
+        ByteReader<uint16_t>::ReadBigEndian(&payload_data[offset]);
+    RTC_DCHECK_EQ(payload_data_length, offset + sizeof(uint16_t));
+    payload_data_length -= kStereoHeaderLength;
+  }
+  if (!depacketizer_.Parse(parsed_payload, payload_data, payload_data_length))
+    return false;
+  parsed_payload->type.Video.codec = kRtpVideoStereo;
+  return true;
+}
+}  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_format_video_stereo.h b/modules/rtp_rtcp/source/rtp_format_video_stereo.h
new file mode 100644
index 0000000..f054498
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_format_video_stereo.h
@@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VIDEO_STEREO_H_
+#define MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VIDEO_STEREO_H_
+
+#include <string>
+
+#include "modules/rtp_rtcp/source/rtp_format.h"
+#include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class RtpPacketizerStereo : public RtpPacketizer {
+ public:
+  RtpPacketizerStereo(const RTPVideoHeaderStereo& header,
+                      FrameType frame_type,
+                      size_t max_payload_len,
+                      size_t last_packet_reduction_len);
+
+  ~RtpPacketizerStereo() override;
+
+  size_t SetPayloadData(const uint8_t* payload_data,
+                        size_t payload_size,
+                        const RTPFragmentationHeader* fragmentation) override;
+
+  // Get the next payload with generic payload header.
+  // Write payload and set marker bit of the |packet|.
+  // Returns true on success, false otherwise.
+  bool NextPacket(RtpPacketToSend* packet) override;
+
+  std::string ToString() override;
+
+ private:
+  const RTPVideoHeaderStereo header_;
+  const size_t max_payload_len_;
+  const size_t last_packet_reduction_len_;
+  size_t num_packets_remaining_ = 0;
+  // TODO(emircan): Use codec specific packetizers. If not possible, refactor
+  // this class to have similar logic to generic packetizer.
+  RtpPacketizerGeneric packetizer_;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerStereo);
+};
+
+class RtpDepacketizerStereo : public RtpDepacketizer {
+ public:
+  ~RtpDepacketizerStereo() override;
+
+  bool Parse(ParsedPayload* parsed_payload,
+             const uint8_t* payload_data,
+             size_t payload_data_length) override;
+
+ private:
+  RtpDepacketizerGeneric depacketizer_;
+};
+}  // namespace webrtc
+#endif  // MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VIDEO_STEREO_H_
diff --git a/modules/rtp_rtcp/source/rtp_format_video_stereo_unittest.cc b/modules/rtp_rtcp/source/rtp_format_video_stereo_unittest.cc
new file mode 100644
index 0000000..f75dbce
--- /dev/null
+++ b/modules/rtp_rtcp/source/rtp_format_video_stereo_unittest.cc
@@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <vector>
+
+#include "modules/rtp_rtcp/source/rtp_format_video_stereo.h"
+#include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+namespace {
+
+using ::testing::Each;
+using ::testing::ElementsAreArray;
+using ::testing::Le;
+using ::testing::SizeIs;
+
+constexpr RtpVideoCodecTypes kTestAssociatedCodecType = kRtpVideoVp9;
+constexpr uint8_t kTestFrameIndex = 23;
+constexpr uint8_t kTestFrameCount = 34;
+constexpr uint16_t kTestPictureIndex = 123;
+
+RTPVideoHeaderStereo GenerateTestStereoHeader() {
+  RTPVideoHeaderStereo header;
+  header.associated_codec_type = kTestAssociatedCodecType;
+  header.indices.frame_index = kTestFrameIndex;
+  header.indices.frame_count = kTestFrameCount;
+  header.indices.picture_index = kTestPictureIndex;
+  return header;
+}
+
+std::vector<uint8_t> GenerateTestPayload() {
+  const size_t kPayloadSize = 68;
+  return std::vector<uint8_t>(kPayloadSize, 0);
+}
+
+std::vector<size_t> NextPacketFillPayloadSizes(
+    RtpPacketizerStereo* packetizer) {
+  RtpPacketToSend packet(nullptr);
+  std::vector<size_t> result;
+  while (packetizer->NextPacket(&packet))
+    result.push_back(packet.payload_size());
+  return result;
+}
+
+}  // namespace
+
+TEST(RtpPacketizerVideoStereo, SmallMaxPayloadSizeThrowsErrors) {
+  const size_t kMaxPayloadLen = 7;
+  const size_t kLastPacketReductionLen = 2;
+  RtpPacketizerStereo packetizer(GenerateTestStereoHeader(), kVideoFrameKey,
+                                 kMaxPayloadLen, kLastPacketReductionLen);
+  const std::vector<uint8_t>& test_payload = GenerateTestPayload();
+  packetizer.SetPayloadData(test_payload.data(), test_payload.size(), nullptr);
+  RtpPacketToSend packet(nullptr);
+  EXPECT_FALSE(packetizer.NextPacket(&packet));
+}
+
+TEST(RtpPacketizerVideoStereo, AllPacketsRespectMaxPayloadSize) {
+  const size_t kMaxPayloadLen = 34;
+  const size_t kLastPacketReductionLen = 2;
+  RtpPacketizerStereo packetizer(GenerateTestStereoHeader(), kVideoFrameKey,
+                                 kMaxPayloadLen, kLastPacketReductionLen);
+  const std::vector<uint8_t>& test_payload = GenerateTestPayload();
+  size_t num_packets = packetizer.SetPayloadData(test_payload.data(),
+                                                 test_payload.size(), nullptr);
+  std::vector<size_t> payload_sizes = NextPacketFillPayloadSizes(&packetizer);
+  EXPECT_THAT(payload_sizes, SizeIs(num_packets));
+  EXPECT_THAT(payload_sizes, Each(Le(kMaxPayloadLen)));
+}
+
+TEST(RtpPacketizerVideoStereo, PreservesTypeAndHeader) {
+  const size_t kMaxPayloadLen = 34;
+  const size_t kLastPacketReductionLen = 2;
+  const auto kFrameType = kVideoFrameKey;
+  RtpPacketizerStereo packetizer(GenerateTestStereoHeader(), kFrameType,
+                                 kMaxPayloadLen, kLastPacketReductionLen);
+  const std::vector<uint8_t>& test_payload = GenerateTestPayload();
+  packetizer.SetPayloadData(test_payload.data(), test_payload.size(), nullptr);
+  RtpPacketToSend packet(nullptr);
+  std::vector<RtpPacketToSend> result;
+  while (packetizer.NextPacket(&packet)) {
+    result.push_back(packet);
+    packet = RtpPacketToSend(nullptr);
+  }
+
+  RtpDepacketizerStereo depacketizer;
+  const auto& first_payload = result.front().payload();
+  RtpDepacketizer::ParsedPayload parsed_payload;
+  ASSERT_TRUE(depacketizer.Parse(&parsed_payload, first_payload.data(),
+                                 first_payload.size()));
+  EXPECT_TRUE(parsed_payload.type.Video.is_first_packet_in_frame);
+
+  const auto& last_payload = result.back().payload();
+  ASSERT_TRUE(depacketizer.Parse(&parsed_payload, last_payload.data(),
+                                 last_payload.size()));
+  EXPECT_EQ(kFrameType, parsed_payload.frame_type);
+  EXPECT_EQ(kRtpVideoStereo, parsed_payload.type.Video.codec);
+  EXPECT_EQ(kTestAssociatedCodecType,
+            parsed_payload.type.Video.codecHeader.stereo.associated_codec_type);
+  EXPECT_EQ(kTestFrameIndex,
+            parsed_payload.type.Video.codecHeader.stereo.indices.frame_index);
+  EXPECT_EQ(kTestFrameCount,
+            parsed_payload.type.Video.codecHeader.stereo.indices.frame_count);
+  EXPECT_EQ(kTestPictureIndex,
+            parsed_payload.type.Video.codecHeader.stereo.indices.picture_index);
+}
+
+}  // namespace webrtc
diff --git a/modules/rtp_rtcp/source/rtp_payload_registry.cc b/modules/rtp_rtcp/source/rtp_payload_registry.cc
index 93e8a34..60c8c75 100644
--- a/modules/rtp_rtcp/source/rtp_payload_registry.cc
+++ b/modules/rtp_rtcp/source/rtp_payload_registry.cc
@@ -58,6 +58,8 @@
     case kVideoCodecRED:
     case kVideoCodecULPFEC:
       return kRtpVideoNone;
+    case kVideoCodecStereo:
+      return kRtpVideoStereo;
     default:
       return kRtpVideoGeneric;
   }
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index cf23bd3..ad618a5 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -89,6 +89,8 @@
     video_type = kRtpVideoH264;
   } else if (RtpUtility::StringCompare(payload_name, "I420", 4)) {
     video_type = kRtpVideoGeneric;
+  } else if (RtpUtility::StringCompare(payload_name, "stereo", 6)) {
+    video_type = kRtpVideoStereo;
   } else {
     video_type = kRtpVideoGeneric;
   }
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 9d22006..997ad08 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -132,6 +132,7 @@
   sources = [
     "codecs/h264/include/h264_globals.h",
     "codecs/interface/common_constants.h",
+    "codecs/stereo/include/stereo_globals.h",
     "codecs/vp8/include/vp8_globals.h",
     "codecs/vp9/include/vp9_globals.h",
   ]
diff --git a/modules/video_coding/codec_database.cc b/modules/video_coding/codec_database.cc
index 2bc3077..e300ad3 100644
--- a/modules/video_coding/codec_database.cc
+++ b/modules/video_coding/codec_database.cc
@@ -254,6 +254,7 @@
     case kVideoCodecRED:
     case kVideoCodecULPFEC:
     case kVideoCodecFlexfec:
+    case kVideoCodecStereo:
       break;
     // Unknown codec type, reset just to be sure.
     case kVideoCodecUnknown:
diff --git a/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h b/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
index ef1e9e1..74ed1a2 100644
--- a/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
+++ b/modules/video_coding/codecs/stereo/include/stereo_encoder_adapter.h
@@ -11,6 +11,7 @@
 #ifndef MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_ENCODER_ADAPTER_H_
 #define MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_ENCODER_ADAPTER_H_
 
+#include <map>
 #include <memory>
 #include <vector>
 
@@ -56,15 +57,16 @@
   // Wrapper class that redirects OnEncodedImage() calls.
   class AdapterEncodedImageCallback;
 
-  // Holds the encoded image output of a frame.
-  struct EncodedImageData;
-
   VideoEncoderFactory* const factory_;
   std::vector<std::unique_ptr<VideoEncoder>> encoders_;
   std::vector<std::unique_ptr<AdapterEncodedImageCallback>> adapter_callbacks_;
   EncodedImageCallback* encoded_complete_callback_;
 
-  uint64_t picture_index_ = 0;
+  // Holds the encoded image info.
+  struct ImageStereoInfo;
+  std::map<uint32_t /* timestamp */, ImageStereoInfo> image_stereo_info_;
+
+  uint16_t picture_index_ = 0;
   std::vector<uint8_t> stereo_dummy_planes_;
 };
 
diff --git a/modules/video_coding/codecs/stereo/include/stereo_globals.h b/modules/video_coding/codecs/stereo/include/stereo_globals.h
new file mode 100644
index 0000000..9f9ad0e
--- /dev/null
+++ b/modules/video_coding/codecs/stereo/include/stereo_globals.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
+#define MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
+
+namespace webrtc {
+
+struct StereoIndices {
+  uint8_t frame_index;
+  uint8_t frame_count;
+  uint16_t picture_index;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_STEREO_INCLUDE_STEREO_GLOBALS_H_
diff --git a/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc b/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
index 82e87d4..caf3299 100644
--- a/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
+++ b/modules/video_coding/codecs/stereo/stereo_decoder_adapter.cc
@@ -33,18 +33,18 @@
                               AlphaCodecStream stream_idx)
       : adapter_(adapter), stream_idx_(stream_idx) {}
 
-  void Decoded(VideoFrame& decodedImage,
+  void Decoded(VideoFrame& decoded_image,
                rtc::Optional<int32_t> decode_time_ms,
                rtc::Optional<uint8_t> qp) override {
     if (!adapter_)
       return;
-    adapter_->Decoded(stream_idx_, &decodedImage, decode_time_ms, qp);
+    adapter_->Decoded(stream_idx_, &decoded_image, decode_time_ms, qp);
   }
-  int32_t Decoded(VideoFrame& decodedImage) override {
+  int32_t Decoded(VideoFrame& decoded_image) override {
     RTC_NOTREACHED();
     return WEBRTC_VIDEO_CODEC_OK;
   }
-  int32_t Decoded(VideoFrame& decodedImage, int64_t decode_time_ms) override {
+  int32_t Decoded(VideoFrame& decoded_image, int64_t decode_time_ms) override {
     RTC_NOTREACHED();
     return WEBRTC_VIDEO_CODEC_OK;
   }
@@ -57,22 +57,22 @@
 struct StereoDecoderAdapter::DecodedImageData {
   explicit DecodedImageData(AlphaCodecStream stream_idx)
       : stream_idx_(stream_idx),
-        decodedImage_(I420Buffer::Create(1 /* width */, 1 /* height */),
-                      0,
-                      0,
-                      kVideoRotation_0) {
+        decoded_image_(I420Buffer::Create(1 /* width */, 1 /* height */),
+                       0,
+                       0,
+                       kVideoRotation_0) {
     RTC_DCHECK_EQ(kAXXStream, stream_idx);
   }
   DecodedImageData(AlphaCodecStream stream_idx,
-                   const VideoFrame& decodedImage,
+                   const VideoFrame& decoded_image,
                    const rtc::Optional<int32_t>& decode_time_ms,
                    const rtc::Optional<uint8_t>& qp)
       : stream_idx_(stream_idx),
-        decodedImage_(decodedImage),
+        decoded_image_(decoded_image),
         decode_time_ms_(decode_time_ms),
         qp_(qp) {}
   const AlphaCodecStream stream_idx_;
-  VideoFrame decodedImage_;
+  VideoFrame decoded_image_;
   const rtc::Optional<int32_t> decode_time_ms_;
   const rtc::Optional<uint8_t> qp_;
 
@@ -113,14 +113,21 @@
     const RTPFragmentationHeader* /*fragmentation*/,
     const CodecSpecificInfo* codec_specific_info,
     int64_t render_time_ms) {
-  // TODO(emircan): Read |codec_specific_info->stereoInfo| to split frames.
-  int32_t rv =
-      decoders_[kYUVStream]->Decode(input_image, missing_frames, nullptr,
-                                    codec_specific_info, render_time_ms);
-  if (rv)
-    return rv;
-  rv = decoders_[kAXXStream]->Decode(input_image, missing_frames, nullptr,
-                                     codec_specific_info, render_time_ms);
+  const CodecSpecificInfoStereo& stereo_info =
+      codec_specific_info->codecSpecific.stereo;
+  RTC_DCHECK_LT(static_cast<size_t>(stereo_info.indices.frame_index),
+                decoders_.size());
+  if (stereo_info.indices.frame_count == 1) {
+    RTC_DCHECK_EQ(static_cast<int>(stereo_info.indices.frame_index), 0);
+    RTC_DCHECK(decoded_data_.find(input_image._timeStamp) ==
+               decoded_data_.end());
+    decoded_data_.emplace(std::piecewise_construct,
+                          std::forward_as_tuple(input_image._timeStamp),
+                          std::forward_as_tuple(kAXXStream));
+  }
+
+  int32_t rv = decoders_[stereo_info.indices.frame_index]->Decode(
+      input_image, missing_frames, nullptr, nullptr, render_time_ms);
   return rv;
 }
 
@@ -152,12 +159,12 @@
     if (stream_idx == kYUVStream) {
       RTC_DCHECK_EQ(kAXXStream, other_image_data.stream_idx_);
       MergeAlphaImages(decoded_image, decode_time_ms, qp,
-                       &other_image_data.decodedImage_,
+                       &other_image_data.decoded_image_,
                        other_image_data.decode_time_ms_, other_image_data.qp_);
     } else {
       RTC_DCHECK_EQ(kYUVStream, other_image_data.stream_idx_);
       RTC_DCHECK_EQ(kAXXStream, stream_idx);
-      MergeAlphaImages(&other_image_data.decodedImage_,
+      MergeAlphaImages(&other_image_data.decoded_image_,
                        other_image_data.decode_time_ms_, other_image_data.qp_,
                        decoded_image, decode_time_ms, qp);
     }
@@ -166,6 +173,8 @@
   }
   RTC_DCHECK(decoded_data_.find(decoded_image->timestamp()) ==
              decoded_data_.end());
+  // decoded_data_[decoded_image->timestamp()] =
+  //     DecodedImageData(stream_idx, *decoded_image, decode_time_ms, qp);
   decoded_data_.emplace(
       std::piecewise_construct,
       std::forward_as_tuple(decoded_image->timestamp()),
@@ -173,16 +182,21 @@
 }
 
 void StereoDecoderAdapter::MergeAlphaImages(
-    VideoFrame* decodedImage,
+    VideoFrame* decoded_image,
     const rtc::Optional<int32_t>& decode_time_ms,
     const rtc::Optional<uint8_t>& qp,
-    VideoFrame* alpha_decodedImage,
+    VideoFrame* alpha_decoded_image,
     const rtc::Optional<int32_t>& alpha_decode_time_ms,
     const rtc::Optional<uint8_t>& alpha_qp) {
+  if (!alpha_decoded_image->timestamp()) {
+    decoded_complete_callback_->Decoded(*decoded_image, decode_time_ms, qp);
+    return;
+  }
+
   rtc::scoped_refptr<webrtc::I420BufferInterface> yuv_buffer =
-      decodedImage->video_frame_buffer()->ToI420();
+      decoded_image->video_frame_buffer()->ToI420();
   rtc::scoped_refptr<webrtc::I420BufferInterface> alpha_buffer =
-      alpha_decodedImage->video_frame_buffer()->ToI420();
+      alpha_decoded_image->video_frame_buffer()->ToI420();
   RTC_DCHECK_EQ(yuv_buffer->width(), alpha_buffer->width());
   RTC_DCHECK_EQ(yuv_buffer->height(), alpha_buffer->height());
   rtc::scoped_refptr<I420ABufferInterface> merged_buffer = WrapI420ABuffer(
@@ -192,8 +206,8 @@
       alpha_buffer->StrideY(),
       rtc::Bind(&KeepBufferRefs, yuv_buffer, alpha_buffer));
 
-  VideoFrame merged_image(merged_buffer, decodedImage->timestamp(),
-                          0 /* render_time_ms */, decodedImage->rotation());
+  VideoFrame merged_image(merged_buffer, decoded_image->timestamp(),
+                          0 /* render_time_ms */, decoded_image->rotation());
   decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp);
 }
 
diff --git a/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc b/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
index b449c68..ed7a486 100644
--- a/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
+++ b/modules/video_coding/codecs/stereo/stereo_encoder_adapter.cc
@@ -44,6 +44,20 @@
   const AlphaCodecStream stream_idx_;
 };
 
+// Holds the encoded image info.
+struct StereoEncoderAdapter::ImageStereoInfo {
+  ImageStereoInfo(uint16_t picture_index, uint8_t frame_count)
+      : picture_index(picture_index),
+        frame_count(frame_count),
+        encoded_count(0) {}
+  uint16_t picture_index;
+  uint8_t frame_count;
+  uint8_t encoded_count;
+
+ private:
+  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ImageStereoInfo);
+};
+
 StereoEncoderAdapter::StereoEncoderAdapter(VideoEncoderFactory* factory)
     : factory_(factory), encoded_complete_callback_(nullptr) {}
 
@@ -83,15 +97,21 @@
   if (!encoded_complete_callback_) {
     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
   }
-  // Encode YUV
-  int rv = encoders_[kYUVStream]->Encode(input_image, codec_specific_info,
-                                         frame_types);
-  if (rv)
-    return rv;
 
   const bool has_alpha = input_image.video_frame_buffer()->type() ==
                          VideoFrameBuffer::Type::kI420A;
-  if (!has_alpha)
+  image_stereo_info_.emplace(
+      std::piecewise_construct, std::forward_as_tuple(input_image.timestamp()),
+      std::forward_as_tuple(picture_index_++,
+                            has_alpha ? kAlphaCodecStreams : 1));
+
+  // Encode YUV
+  int rv = encoders_[kYUVStream]->Encode(input_image, codec_specific_info,
+                                         frame_types);
+  // If we do not receive an alpha frame, we send a single frame for this
+  // |picture_index_|. The receiver will receive |frame_count| as 1 which
+  // soecifies this case.
+  if (rv || !has_alpha)
     return rv;
 
   // Encode AXX
@@ -129,7 +149,7 @@
 int StereoEncoderAdapter::SetRateAllocation(const BitrateAllocation& bitrate,
                                             uint32_t framerate) {
   for (auto& encoder : encoders_) {
-    // TODO(emircan): |new_framerate| is used to calculate duration for encoder
+    // TODO(emircan): |framerate| is used to calculate duration in encoder
     // instances. We report the total frame rate to keep real time for now.
     // Remove this after refactoring duration logic.
     const int rv = encoder->SetRateAllocation(
@@ -160,11 +180,25 @@
     const EncodedImage& encodedImage,
     const CodecSpecificInfo* codecSpecificInfo,
     const RTPFragmentationHeader* fragmentation) {
-  if (stream_idx == kAXXStream)
-    return EncodedImageCallback::Result(EncodedImageCallback::Result::OK);
+  const VideoCodecType associated_coded_type = codecSpecificInfo->codecType;
+  const auto& image_stereo_info_itr =
+      image_stereo_info_.find(encodedImage._timeStamp);
+  RTC_DCHECK(image_stereo_info_itr != image_stereo_info_.end());
+  ImageStereoInfo& image_stereo_info = image_stereo_info_itr->second;
+  const uint8_t frame_count = image_stereo_info.frame_count;
+  const uint16_t picture_index = image_stereo_info.picture_index;
+  if (++image_stereo_info.encoded_count == frame_count)
+    image_stereo_info_.erase(image_stereo_info_itr);
 
-  // TODO(emircan): Fill |codec_specific_info| with stereo parameters.
-  encoded_complete_callback_->OnEncodedImage(encodedImage, codecSpecificInfo,
+  CodecSpecificInfo codec_info = *codecSpecificInfo;
+  codec_info.codecType = kVideoCodecStereo;
+  codec_info.codec_name = "stereo";
+  codec_info.codecSpecific.stereo.associated_codec_type = associated_coded_type;
+  codec_info.codecSpecific.stereo.indices.frame_index = stream_idx;
+  codec_info.codecSpecific.stereo.indices.frame_count = frame_count;
+  codec_info.codecSpecific.stereo.indices.picture_index = picture_index;
+
+  encoded_complete_callback_->OnEncodedImage(encodedImage, &codec_info,
                                              fragmentation);
   return EncodedImageCallback::Result(EncodedImageCallback::Result::OK);
 }
diff --git a/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc b/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
index caef8b1..34723c5 100644
--- a/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
+++ b/modules/video_coding/codecs/stereo/test/stereo_adapter_unittest.cc
@@ -101,8 +101,18 @@
   EncodedImage encoded_frame;
   CodecSpecificInfo codec_specific_info;
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            decoder_->Decode(encoded_frame, false, nullptr));
+
+  EXPECT_EQ(kVideoCodecStereo, codec_specific_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            codec_specific_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(0, codec_specific_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(1, codec_specific_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull,
+            codec_specific_info.codecSpecific.stereo.indices.picture_index);
+
+  EXPECT_EQ(
+      WEBRTC_VIDEO_CODEC_OK,
+      decoder_->Decode(encoded_frame, false, nullptr, &codec_specific_info));
   std::unique_ptr<VideoFrame> decoded_frame;
   rtc::Optional<uint8_t> decoded_qp;
   ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
@@ -112,13 +122,38 @@
 
 TEST_F(TestStereoAdapter, EncodeDecodeI420AFrame) {
   std::unique_ptr<VideoFrame> yuva_frame = CreateI420AInputFrame();
+  const size_t expected_num_encoded_frames = 2;
+  SetWaitForEncodedFramesThreshold(expected_num_encoded_frames);
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*yuva_frame, nullptr, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            decoder_->Decode(encoded_frame, false, nullptr));
+  std::vector<EncodedImage> encoded_frames;
+  std::vector<CodecSpecificInfo> codec_specific_infos;
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_infos));
+  EXPECT_EQ(expected_num_encoded_frames, encoded_frames.size());
+  EXPECT_EQ(expected_num_encoded_frames, codec_specific_infos.size());
+
+  const CodecSpecificInfo& yuv_info = codec_specific_infos[kYUVStream];
+  EXPECT_EQ(kVideoCodecStereo, yuv_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            yuv_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(kYUVStream, yuv_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(kAlphaCodecStreams,
+            yuv_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull, yuv_info.codecSpecific.stereo.indices.picture_index);
+
+  const CodecSpecificInfo& axx_info = codec_specific_infos[kAXXStream];
+  EXPECT_EQ(kVideoCodecStereo, axx_info.codecType);
+  EXPECT_EQ(kVideoCodecVP9,
+            axx_info.codecSpecific.stereo.associated_codec_type);
+  EXPECT_EQ(kAXXStream, axx_info.codecSpecific.stereo.indices.frame_index);
+  EXPECT_EQ(kAlphaCodecStreams,
+            axx_info.codecSpecific.stereo.indices.frame_count);
+  EXPECT_EQ(0ull, axx_info.codecSpecific.stereo.indices.picture_index);
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frames[kYUVStream],
+                                                    false, nullptr, &yuv_info));
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frames[kAXXStream],
+                                                    false, nullptr, &axx_info));
   std::unique_ptr<VideoFrame> decoded_frame;
   rtc::Optional<uint8_t> decoded_qp;
   ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
diff --git a/modules/video_coding/codecs/test/video_codec_test.cc b/modules/video_coding/codecs/test/video_codec_test.cc
index b6faae8..88e9b39 100644
--- a/modules/video_coding/codecs/test/video_codec_test.cc
+++ b/modules/video_coding/codecs/test/video_codec_test.cc
@@ -33,13 +33,19 @@
     const CodecSpecificInfo* codec_specific_info,
     const RTPFragmentationHeader* fragmentation) {
   rtc::CritScope lock(&test_->encoded_frame_section_);
-  test_->encoded_frame_.emplace(frame);
+  test_->encoded_frames_.push_back(frame);
   RTC_DCHECK(codec_specific_info);
-  test_->codec_specific_info_.codecType = codec_specific_info->codecType;
-  // Skip |codec_name|, to avoid allocating.
-  test_->codec_specific_info_.codecSpecific =
-      codec_specific_info->codecSpecific;
-  test_->encoded_frame_event_.Set();
+  test_->codec_specific_infos_.push_back(*codec_specific_info);
+  if (!test_->wait_for_encoded_frames_threshold_) {
+    test_->encoded_frame_event_.Set();
+    return Result(Result::OK);
+  }
+
+  if (test_->encoded_frames_.size() ==
+      test_->wait_for_encoded_frames_threshold_) {
+    test_->wait_for_encoded_frames_threshold_ = 1;
+    test_->encoded_frame_event_.Set();
+  }
   return Result(Result::OK);
 }
 
@@ -74,17 +80,38 @@
 bool VideoCodecTest::WaitForEncodedFrame(
     EncodedImage* frame,
     CodecSpecificInfo* codec_specific_info) {
-  bool ret = encoded_frame_event_.Wait(kEncodeTimeoutMs);
-  EXPECT_TRUE(ret) << "Timed out while waiting for an encoded frame.";
+  std::vector<EncodedImage> frames;
+  std::vector<CodecSpecificInfo> codec_specific_infos;
+  if (!WaitForEncodedFrames(&frames, &codec_specific_infos))
+    return false;
+  EXPECT_EQ(frames.size(), static_cast<size_t>(1));
+  EXPECT_EQ(frames.size(), codec_specific_infos.size());
+  *frame = frames[0];
+  *codec_specific_info = codec_specific_infos[0];
+  return true;
+}
+
+void VideoCodecTest::SetWaitForEncodedFramesThreshold(size_t num_frames) {
+  rtc::CritScope lock(&encoded_frame_section_);
+  wait_for_encoded_frames_threshold_ = num_frames;
+}
+
+bool VideoCodecTest::WaitForEncodedFrames(
+    std::vector<EncodedImage>* frames,
+    std::vector<CodecSpecificInfo>* codec_specific_info) {
+  EXPECT_TRUE(encoded_frame_event_.Wait(kEncodeTimeoutMs))
+      << "Timed out while waiting for encoded frame.";
   // This becomes unsafe if there are multiple threads waiting for frames.
   rtc::CritScope lock(&encoded_frame_section_);
-  EXPECT_TRUE(encoded_frame_);
-  if (encoded_frame_) {
-    *frame = std::move(*encoded_frame_);
-    encoded_frame_.reset();
-    RTC_DCHECK(codec_specific_info);
-    codec_specific_info->codecType = codec_specific_info_.codecType;
-    codec_specific_info->codecSpecific = codec_specific_info_.codecSpecific;
+  EXPECT_FALSE(encoded_frames_.empty());
+  EXPECT_FALSE(codec_specific_infos_.empty());
+  EXPECT_EQ(encoded_frames_.size(), codec_specific_infos_.size());
+  if (!encoded_frames_.empty()) {
+    *frames = encoded_frames_;
+    encoded_frames_.clear();
+    RTC_DCHECK(!codec_specific_infos_.empty());
+    *codec_specific_info = codec_specific_infos_;
+    codec_specific_infos_.clear();
     return true;
   } else {
     return false;
diff --git a/modules/video_coding/codecs/test/video_codec_test.h b/modules/video_coding/codecs/test/video_codec_test.h
index 76f5234..6c67a26 100644
--- a/modules/video_coding/codecs/test/video_codec_test.h
+++ b/modules/video_coding/codecs/test/video_codec_test.h
@@ -12,6 +12,7 @@
 #define MODULES_VIDEO_CODING_CODECS_TEST_VIDEO_CODEC_TEST_H_
 
 #include <memory>
+#include <vector>
 
 #include "api/video_codecs/video_decoder.h"
 #include "api/video_codecs/video_encoder.h"
@@ -32,6 +33,7 @@
         decode_complete_callback_(this),
         encoded_frame_event_(false /* manual reset */,
                              false /* initially signaled */),
+        wait_for_encoded_frames_threshold_(1),
         decoded_frame_event_(false /* manual reset */,
                              false /* initially signaled */) {}
 
@@ -74,8 +76,19 @@
 
   void SetUp() override;
 
+  // Helper method for waiting a single encoded frame.
   bool WaitForEncodedFrame(EncodedImage* frame,
                            CodecSpecificInfo* codec_specific_info);
+
+  // Helper methods for waiting for multiple encoded frames. Caller must
+  // define how many frames are to be waited for via |num_frames| before calling
+  // Encode(). Then, they can expect to retrive them via WaitForEncodedFrames().
+  void SetWaitForEncodedFramesThreshold(size_t num_frames);
+  bool WaitForEncodedFrames(
+      std::vector<EncodedImage>* frames,
+      std::vector<CodecSpecificInfo>* codec_specific_info);
+
+  // Helper method for waiting a single decoded frame.
   bool WaitForDecodedFrame(std::unique_ptr<VideoFrame>* frame,
                            rtc::Optional<uint8_t>* qp);
 
@@ -95,9 +108,11 @@
 
   rtc::Event encoded_frame_event_;
   rtc::CriticalSection encoded_frame_section_;
-  rtc::Optional<EncodedImage> encoded_frame_
+  size_t wait_for_encoded_frames_threshold_;
+  std::vector<EncodedImage> encoded_frames_
       RTC_GUARDED_BY(encoded_frame_section_);
-  CodecSpecificInfo codec_specific_info_ RTC_GUARDED_BY(encoded_frame_section_);
+  std::vector<CodecSpecificInfo> codec_specific_infos_
+      RTC_GUARDED_BY(encoded_frame_section_);
 
   rtc::Event decoded_frame_event_;
   rtc::CriticalSection decoded_frame_section_;
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index 8abea7e..2d8716c 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -193,6 +193,28 @@
         _codecSpecificInfo.codecType = kVideoCodecH264;
         break;
       }
+      case kRtpVideoStereo: {
+        _codecSpecificInfo.codecType = kVideoCodecStereo;
+        VideoCodecType associated_codec_type = kVideoCodecUnknown;
+        switch (header->codecHeader.stereo.associated_codec_type) {
+          case kRtpVideoVp8:
+            associated_codec_type = kVideoCodecVP8;
+            break;
+          case kRtpVideoVp9:
+            associated_codec_type = kVideoCodecVP9;
+            break;
+          case kRtpVideoH264:
+            associated_codec_type = kVideoCodecH264;
+            break;
+          default:
+            RTC_NOTREACHED();
+        }
+        _codecSpecificInfo.codecSpecific.stereo.associated_codec_type =
+            associated_codec_type;
+        _codecSpecificInfo.codecSpecific.stereo.indices =
+            header->codecHeader.stereo.indices;
+        break;
+      }
       default: {
         _codecSpecificInfo.codecType = kVideoCodecUnknown;
         break;
diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc
index 6a31cfd..6eb28de 100644
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@@ -43,9 +43,14 @@
   frame_type_ = first_packet->frameType;
   codec_type_ = first_packet->codec;
 
+  // Stereo codec appends CopyCodecSpecific to last packet to avoid copy.
+  VCMPacket* packet_with_codec_specific =
+      codec_type_ == kVideoCodecStereo ? packet_buffer_->GetPacket(last_seq_num)
+                                       : first_packet;
+
   // TODO(philipel): Remove when encoded image is replaced by FrameObject.
   // VCMEncodedFrame members
-  CopyCodecSpecific(&first_packet->video_header);
+  CopyCodecSpecific(&packet_with_codec_specific->video_header);
   _completeFrame = true;
   _payloadType = first_packet->payloadType;
   _timeStamp = first_packet->timestamp;
diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h
index 6616053..ef52d8bf 100644
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@@ -73,11 +73,17 @@
   H264PacketizationMode packetization_mode;
 };
 
+struct CodecSpecificInfoStereo {
+  VideoCodecType associated_codec_type;
+  StereoIndices indices;
+};
+
 union CodecSpecificInfoUnion {
   CodecSpecificInfoGeneric generic;
   CodecSpecificInfoVP8 VP8;
   CodecSpecificInfoVP9 VP9;
   CodecSpecificInfoH264 H264;
+  CodecSpecificInfoStereo stereo;
 };
 
 // Note: if any pointers are added to this struct or its sub-structs, it
diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc
index f176194..9ae5ba0 100644
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@@ -133,6 +133,9 @@
       }
       codec = kVideoCodecH264;
       return;
+    case kRtpVideoStereo:
+      codec = kVideoCodecStereo;
+      return;
     case kRtpVideoGeneric:
       codec = kVideoCodecGeneric;
       return;
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index a2f32c2..1f8519e 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -98,6 +98,7 @@
     case kVideoCodecUnknown:
     case kVideoCodecH264:
     case kVideoCodecI420:
+    case kVideoCodecStereo:
     case kVideoCodecGeneric:
       return ManageFrameGeneric(frame, kNoPictureId);
   }