Pass picture_id from generic packetizer through codec-specific field

To free up RtpVideoHeader::generic field for codec agnostic details
from an rtp header extension.

Bug: webrtc:10342
Change-Id: I7b9d869b2ecfedb96dfd860be47ed8dffa058749
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166175
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30396}
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index c71af6b..70b156a 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -240,15 +240,10 @@
       rtp_video_header->frame_marking.tl0_pic_idx = state_.tl0_pic_idx;
     }
   }
-  // There are currently two generic descriptors in WebRTC. The old descriptor
-  // can not share a picture id space between simulcast streams, so we use the
-  // |picture_id| in this case. We let the |picture_id| tag along in |frame_id|
-  // until the old generic format can be removed.
-  // TODO(philipel): Remove this when the new generic format has been fully
-  //                 implemented.
   if (generic_picture_id_experiment_ &&
       rtp_video_header->codec == kVideoCodecGeneric) {
-    rtp_video_header->generic.emplace().frame_id = state_.picture_id;
+    rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+        .picture_id = state_.picture_id;
   }
 }
 
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index 90b08a2..ad5d8e1 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -333,12 +333,16 @@
       params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
 
   EXPECT_EQ(kVideoCodecGeneric, header.codec);
-  ASSERT_TRUE(header.generic);
-  EXPECT_EQ(0, header.generic->frame_id);
+  const auto* generic =
+      absl::get_if<RTPVideoHeaderLegacyGeneric>(&header.video_type_header);
+  ASSERT_TRUE(generic);
+  EXPECT_EQ(0, generic->picture_id);
 
   header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
-  ASSERT_TRUE(header.generic);
-  EXPECT_EQ(1, header.generic->frame_id);
+  generic =
+      absl::get_if<RTPVideoHeaderLegacyGeneric>(&header.video_type_header);
+  ASSERT_TRUE(generic);
+  EXPECT_EQ(1, generic->picture_id);
 }
 
 TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) {
diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic.cc b/modules/rtp_rtcp/source/rtp_format_video_generic.cc
index cf2bf19..35d0f3d 100644
--- a/modules/rtp_rtcp/source/rtp_format_video_generic.cc
+++ b/modules/rtp_rtcp/source/rtp_format_video_generic.cc
@@ -87,10 +87,11 @@
   if (rtp_video_header.frame_type == VideoFrameType::kVideoFrameKey) {
     header_[0] |= RtpFormatVideoGeneric::kKeyFrameBit;
   }
-  if (rtp_video_header.generic.has_value()) {
+  if (const auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
+          &rtp_video_header.video_type_header)) {
     // Store bottom 15 bits of the picture id. Only 15 bits are used for
     // compatibility with other packetizer implemenetations.
-    uint16_t picture_id = rtp_video_header.generic->frame_id & 0x7FFF;
+    uint16_t picture_id = generic_header->picture_id;
     header_[0] |= RtpFormatVideoGeneric::kExtendedHeaderBit;
     header_[1] = (picture_id >> 8) & 0x7F;
     header_[2] = picture_id & 0xFF;
diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
index a13b531..35e7fe7 100644
--- a/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_video_generic_unittest.cc
@@ -75,7 +75,8 @@
   const uint8_t kPayload[kPayloadSize] = {};
 
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.generic.emplace().frame_id = 37;
+  rtp_video_header.video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+      .picture_id = 37;
   rtp_video_header.frame_type = VideoFrameType::kVideoFrameKey;
   RtpPacketizerGeneric packetizer(kPayload, kNoSizeLimits, rtp_video_header);
 
@@ -97,7 +98,8 @@
   RtpPacketizer::PayloadSizeLimits limits;
   limits.max_payload_len = 6;
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.generic.emplace().frame_id = 37;
+  rtp_video_header.video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+      .picture_id = 37;
   RtpPacketizerGeneric packetizer(kPayload, limits, rtp_video_header);
 
   std::vector<int> payload_sizes = NextPacketFillPayloadSizes(&packetizer);
@@ -112,7 +114,8 @@
   RtpPacketizer::PayloadSizeLimits limits;
   limits.max_payload_len = 6;
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.generic.emplace().frame_id = 37;
+  rtp_video_header.video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+      .picture_id = 37;
   RtpPacketizerGeneric packetizer(kPayload, limits, rtp_video_header);
   std::vector<int> payload_sizes = NextPacketFillPayloadSizes(&packetizer);
 
@@ -126,7 +129,8 @@
   const uint8_t kPayload[kPayloadSize] = {};
 
   RTPVideoHeader rtp_video_header;
-  rtp_video_header.generic.emplace().frame_id = 0x8137;
+  rtp_video_header.video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
+      .picture_id = 0x8137;
   rtp_video_header.frame_type = VideoFrameType::kVideoFrameKey;
   RtpPacketizerGeneric packetizer(kPayload, kNoSizeLimits, rtp_video_header);
 
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index 7b7e018..9779df1 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -806,6 +806,9 @@
       return vp9.temporal_idx;
     }
     uint8_t operator()(const RTPVideoHeaderH264&) { return kNoTemporalIdx; }
+    uint8_t operator()(const RTPVideoHeaderLegacyGeneric&) {
+      return kNoTemporalIdx;
+    }
     uint8_t operator()(const absl::monostate&) { return kNoTemporalIdx; }
   };
   switch (header.codec) {
diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h
index 9af2c09..b66cba8 100644
--- a/modules/rtp_rtcp/source/rtp_video_header.h
+++ b/modules/rtp_rtcp/source/rtp_video_header.h
@@ -28,10 +28,18 @@
 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
 
 namespace webrtc {
+// Details passed in the rtp payload for legacy generic rtp packetizer.
+// TODO(bugs.webrtc.org/9772): Deprecate in favor of passing generic video
+// details in an rtp header extension.
+struct RTPVideoHeaderLegacyGeneric {
+  uint16_t picture_id;
+};
+
 using RTPVideoTypeHeader = absl::variant<absl::monostate,
                                          RTPVideoHeaderVP8,
                                          RTPVideoHeaderVP9,
-                                         RTPVideoHeaderH264>;
+                                         RTPVideoHeaderH264,
+                                         RTPVideoHeaderLegacyGeneric>;
 
 struct RTPVideoHeader {
   struct GenericDescriptorInfo {
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc
index e601eae..6010771 100644
--- a/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_generic.cc
@@ -59,9 +59,9 @@
       RTC_LOG(LS_WARNING) << "Too short payload for generic header.";
       return absl::nullopt;
     }
-    parsed->video_header.generic.emplace();
-    parsed->video_header.generic->frame_id =
-        ((payload_data[1] & 0x7F) << 8) | payload_data[2];
+    parsed->video_header.video_type_header
+        .emplace<RTPVideoHeaderLegacyGeneric>()
+        .picture_id = ((payload_data[1] & 0x7F) << 8) | payload_data[2];
     offset += kExtendedHeaderLength;
   }
 
diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_generic_unittest.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_generic_unittest.cc
index 524fc3f..860ddab 100644
--- a/modules/rtp_rtcp/source/video_rtp_depacketizer_generic_unittest.cc
+++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_generic_unittest.cc
@@ -46,8 +46,10 @@
       depacketizer.Parse(rtp_payload);
 
   ASSERT_TRUE(parsed);
-  ASSERT_TRUE(parsed->video_header.generic);
-  EXPECT_EQ(parsed->video_header.generic->frame_id, 0x1337);
+  const auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
+      &parsed->video_header.video_type_header);
+  ASSERT_TRUE(generic_header);
+  EXPECT_EQ(generic_header->picture_id, 0x1337);
   EXPECT_THAT(parsed->video_payload, SizeIs(kRtpPayloadSize - 3));
 }
 
diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc
index 873e71a..5122821 100644
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@@ -111,15 +111,14 @@
       return ManageFrameVp9(frame);
     case kVideoCodecH264:
       return ManageFrameH264(frame);
-    default: {
-      // Use 15 first bits of frame ID as picture ID if available.
-      const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
-      int picture_id = kNoPictureId;
-      if (video_header.generic)
-        picture_id = video_header.generic->frame_id & 0x7fff;
-
-      return ManageFramePidOrSeqNum(frame, picture_id);
-    }
+    case kVideoCodecGeneric:
+      if (auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
+              &frame->GetRtpVideoHeader().video_type_header)) {
+        return ManageFramePidOrSeqNum(frame, generic_header->picture_id);
+      }
+      ABSL_FALLTHROUGH_INTENDED;
+    default:
+      return ManageFramePidOrSeqNum(frame, kNoPictureId);
   }
 }