Remove RTPVideoHeader::vp9() accessors.

TBR=stefan@webrtc.org

Bug: none
Change-Id: Ia2f728ea3377754a16a0b081e25c4479fe211b3e
Reviewed-on: https://webrtc-review.googlesource.com/93024
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24243}
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index d0915cc..d61d40f 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -33,39 +33,40 @@
       return;
     }
     case kVideoCodecVP9: {
-      rtp->vp9().InitRTPVideoHeaderVP9();
-      rtp->vp9().inter_pic_predicted =
+      auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
+      vp9_header.InitRTPVideoHeaderVP9();
+      vp9_header.inter_pic_predicted =
           info.codecSpecific.VP9.inter_pic_predicted;
-      rtp->vp9().flexible_mode = info.codecSpecific.VP9.flexible_mode;
-      rtp->vp9().ss_data_available = info.codecSpecific.VP9.ss_data_available;
-      rtp->vp9().non_ref_for_inter_layer_pred =
+      vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
+      vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
+      vp9_header.non_ref_for_inter_layer_pred =
           info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
-      rtp->vp9().temporal_idx = info.codecSpecific.VP9.temporal_idx;
-      rtp->vp9().spatial_idx = info.codecSpecific.VP9.spatial_idx;
-      rtp->vp9().temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
-      rtp->vp9().inter_layer_predicted =
+      vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
+      vp9_header.spatial_idx = info.codecSpecific.VP9.spatial_idx;
+      vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
+      vp9_header.inter_layer_predicted =
           info.codecSpecific.VP9.inter_layer_predicted;
-      rtp->vp9().gof_idx = info.codecSpecific.VP9.gof_idx;
-      rtp->vp9().num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
+      vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
+      vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
 
       if (info.codecSpecific.VP9.ss_data_available) {
-        rtp->vp9().spatial_layer_resolution_present =
+        vp9_header.spatial_layer_resolution_present =
             info.codecSpecific.VP9.spatial_layer_resolution_present;
         if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
           for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
                ++i) {
-            rtp->vp9().width[i] = info.codecSpecific.VP9.width[i];
-            rtp->vp9().height[i] = info.codecSpecific.VP9.height[i];
+            vp9_header.width[i] = info.codecSpecific.VP9.width[i];
+            vp9_header.height[i] = info.codecSpecific.VP9.height[i];
           }
         }
-        rtp->vp9().gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
+        vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
       }
 
-      rtp->vp9().num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
+      vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
       for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
-        rtp->vp9().pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
+        vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
       }
-      rtp->vp9().end_of_picture = info.codecSpecific.VP9.end_of_picture;
+      vp9_header.end_of_picture = info.codecSpecific.VP9.end_of_picture;
       return;
     }
     case kVideoCodecH264: {
@@ -161,19 +162,21 @@
     }
   }
   if (rtp_video_header->codec == kVideoCodecVP9) {
-    rtp_video_header->vp9().picture_id = state_.picture_id;
+    auto& vp9_header =
+        absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
+    vp9_header.picture_id = state_.picture_id;
 
     // Note that in the case that we have no temporal layers but we do have
     // spatial layers, packets will carry layering info with a temporal_idx of
     // zero, and we then have to set and increment tl0_pic_idx.
-    if (rtp_video_header->vp9().temporal_idx != kNoTemporalIdx ||
-        rtp_video_header->vp9().spatial_idx != kNoSpatialIdx) {
+    if (vp9_header.temporal_idx != kNoTemporalIdx ||
+        vp9_header.spatial_idx != kNoSpatialIdx) {
       if (first_frame_in_picture &&
-          (rtp_video_header->vp9().temporal_idx == 0 ||
-           rtp_video_header->vp9().temporal_idx == kNoTemporalIdx)) {
+          (vp9_header.temporal_idx == 0 ||
+           vp9_header.temporal_idx == kNoTemporalIdx)) {
         ++state_.tl0_pic_idx;
       }
-      rtp_video_header->vp9().tl0_pic_idx = state_.tl0_pic_idx;
+      vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
     }
   }
 }
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index 96aa591..b2339cd 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -83,14 +83,15 @@
   EXPECT_EQ(kVideoRotation_90, header.rotation);
   EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
   EXPECT_EQ(kVideoCodecVP9, header.codec);
-  EXPECT_EQ(kPictureId + 1, header.vp9().picture_id);
-  EXPECT_EQ(kTl0PicIdx, header.vp9().tl0_pic_idx);
-  EXPECT_EQ(header.vp9().temporal_idx,
-            codec_info.codecSpecific.VP9.temporal_idx);
-  EXPECT_EQ(header.vp9().spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
-  EXPECT_EQ(header.vp9().num_spatial_layers,
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(header.video_type_header);
+  EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
+  EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
+  EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
+  EXPECT_EQ(vp9_header.spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
+  EXPECT_EQ(vp9_header.num_spatial_layers,
             codec_info.codecSpecific.VP9.num_spatial_layers);
-  EXPECT_EQ(header.vp9().end_of_picture,
+  EXPECT_EQ(vp9_header.end_of_picture,
             codec_info.codecSpecific.VP9.end_of_picture);
 
   // Next spatial layer.
@@ -103,14 +104,13 @@
   EXPECT_EQ(kVideoRotation_90, header.rotation);
   EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
   EXPECT_EQ(kVideoCodecVP9, header.codec);
-  EXPECT_EQ(kPictureId + 1, header.vp9().picture_id);
-  EXPECT_EQ(kTl0PicIdx, header.vp9().tl0_pic_idx);
-  EXPECT_EQ(header.vp9().temporal_idx,
-            codec_info.codecSpecific.VP9.temporal_idx);
-  EXPECT_EQ(header.vp9().spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
-  EXPECT_EQ(header.vp9().num_spatial_layers,
+  EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
+  EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
+  EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
+  EXPECT_EQ(vp9_header.spatial_idx, codec_info.codecSpecific.VP9.spatial_idx);
+  EXPECT_EQ(vp9_header.num_spatial_layers,
             codec_info.codecSpecific.VP9.num_spatial_layers);
-  EXPECT_EQ(header.vp9().end_of_picture,
+  EXPECT_EQ(vp9_header.end_of_picture,
             codec_info.codecSpecific.VP9.end_of_picture);
 }
 
@@ -226,8 +226,10 @@
   RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
-  EXPECT_EQ(kInitialPictureId1 + 1, header.vp9().picture_id);
-  EXPECT_EQ(kInitialTl0PicIdx1, header.vp9().tl0_pic_idx);
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(header.video_type_header);
+  EXPECT_EQ(kInitialPictureId1 + 1, vp9_header.picture_id);
+  EXPECT_EQ(kInitialTl0PicIdx1, vp9_header.tl0_pic_idx);
 
   // OnEncodedImage, temporalIdx: 0.
   codec_info.codecSpecific.VP9.temporal_idx = 0;
@@ -235,8 +237,8 @@
   header = params.GetRtpVideoHeader(encoded_image, &codec_info);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
-  EXPECT_EQ(kInitialPictureId1 + 2, header.vp9().picture_id);
-  EXPECT_EQ(kInitialTl0PicIdx1 + 1, header.vp9().tl0_pic_idx);
+  EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
+  EXPECT_EQ(kInitialTl0PicIdx1 + 1, vp9_header.tl0_pic_idx);
 
   // OnEncodedImage, first_frame_in_picture = false
   codec_info.codecSpecific.VP9.first_frame_in_picture = false;
@@ -244,8 +246,8 @@
   header = params.GetRtpVideoHeader(encoded_image, &codec_info);
 
   EXPECT_EQ(kVideoCodecVP9, header.codec);
-  EXPECT_EQ(kInitialPictureId1 + 2, header.vp9().picture_id);
-  EXPECT_EQ(kInitialTl0PicIdx1 + 1, header.vp9().tl0_pic_idx);
+  EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
+  EXPECT_EQ(kInitialTl0PicIdx1 + 1, vp9_header.tl0_pic_idx);
 
   // State should hold latest used picture id and tl0_pic_idx.
   EXPECT_EQ(kInitialPictureId1 + 2, params.state().picture_id);
diff --git a/modules/rtp_rtcp/source/rtp_format.cc b/modules/rtp_rtcp/source/rtp_format.cc
index 3f2038e..f13128f 100644
--- a/modules/rtp_rtcp/source/rtp_format.cc
+++ b/modules/rtp_rtcp/source/rtp_format.cc
@@ -34,9 +34,12 @@
     case kVideoCodecVP8:
       return new RtpPacketizerVp8(rtp_video_header->vp8(), max_payload_len,
                                   last_packet_reduction_len);
-    case kVideoCodecVP9:
-      return new RtpPacketizerVp9(rtp_video_header->vp9(), max_payload_len,
+    case kVideoCodecVP9: {
+      const auto& vp9 =
+          absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
+      return new RtpPacketizerVp9(vp9, max_payload_len,
                                   last_packet_reduction_len);
+    }
     case kVideoCodecGeneric:
       return new RtpPacketizerGeneric(frame_type, max_payload_len,
                                       last_packet_reduction_len);
diff --git a/modules/rtp_rtcp/source/rtp_format_vp9.cc b/modules/rtp_rtcp/source/rtp_format_vp9.cc
index 8fd1be8..974df8f 100644
--- a/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc
@@ -719,41 +719,42 @@
 
   parsed_payload->frame_type = p_bit ? kVideoFrameDelta : kVideoFrameKey;
 
-  RTPVideoHeaderVP9* vp9 = &parsed_payload->video_header().vp9();
-  vp9->InitRTPVideoHeaderVP9();
-  vp9->inter_pic_predicted = p_bit ? true : false;
-  vp9->flexible_mode = f_bit ? true : false;
-  vp9->beginning_of_frame = b_bit ? true : false;
-  vp9->end_of_frame = e_bit ? true : false;
-  vp9->ss_data_available = v_bit ? true : false;
-  vp9->non_ref_for_inter_layer_pred = z_bit ? true : false;
+  auto& vp9_header = parsed_payload->video_header()
+                         .video_type_header.emplace<RTPVideoHeaderVP9>();
+  vp9_header.InitRTPVideoHeaderVP9();
+  vp9_header.inter_pic_predicted = p_bit ? true : false;
+  vp9_header.flexible_mode = f_bit ? true : false;
+  vp9_header.beginning_of_frame = b_bit ? true : false;
+  vp9_header.end_of_frame = e_bit ? true : false;
+  vp9_header.ss_data_available = v_bit ? true : false;
+  vp9_header.non_ref_for_inter_layer_pred = z_bit ? true : false;
 
   // Parse fields that are present.
-  if (i_bit && !ParsePictureId(&parser, vp9)) {
+  if (i_bit && !ParsePictureId(&parser, &vp9_header)) {
     RTC_LOG(LS_ERROR) << "Failed parsing VP9 picture id.";
     return false;
   }
-  if (l_bit && !ParseLayerInfo(&parser, vp9)) {
+  if (l_bit && !ParseLayerInfo(&parser, &vp9_header)) {
     RTC_LOG(LS_ERROR) << "Failed parsing VP9 layer info.";
     return false;
   }
-  if (p_bit && f_bit && !ParseRefIndices(&parser, vp9)) {
+  if (p_bit && f_bit && !ParseRefIndices(&parser, &vp9_header)) {
     RTC_LOG(LS_ERROR) << "Failed parsing VP9 ref indices.";
     return false;
   }
   if (v_bit) {
-    if (!ParseSsData(&parser, vp9)) {
+    if (!ParseSsData(&parser, &vp9_header)) {
       RTC_LOG(LS_ERROR) << "Failed parsing VP9 SS data.";
       return false;
     }
-    if (vp9->spatial_layer_resolution_present) {
+    if (vp9_header.spatial_layer_resolution_present) {
       // TODO(asapersson): Add support for spatial layers.
-      parsed_payload->video_header().width = vp9->width[0];
-      parsed_payload->video_header().height = vp9->height[0];
+      parsed_payload->video_header().width = vp9_header.width[0];
+      parsed_payload->video_header().height = vp9_header.height[0];
     }
   }
   parsed_payload->video_header().is_first_packet_in_frame =
-      b_bit && (!l_bit || !vp9->inter_layer_predicted);
+      b_bit && (!l_bit || !vp9_header.inter_layer_predicted);
 
   uint64_t rem_bits = parser.RemainingBitCount();
   assert(rem_bits % 8 == 0);
diff --git a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
index ada56d0..66c6091 100644
--- a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@@ -82,7 +82,9 @@
   RtpDepacketizer::ParsedPayload parsed;
   ASSERT_TRUE(depacketizer->Parse(&parsed, packet, expected_length));
   EXPECT_EQ(kVideoCodecVP9, parsed.video_header().codec);
-  VerifyHeader(expected, parsed.video_header().vp9());
+  auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(parsed.video_header().video_type_header);
+  VerifyHeader(expected, vp9_header);
   const size_t kExpectedPayloadLength = expected_length - expected_hdr_length;
   VerifyPayload(parsed, packet + expected_hdr_length, kExpectedPayloadLength);
 }
diff --git a/modules/rtp_rtcp/source/rtp_sender_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
index 0631a29..9622680 100644
--- a/modules/rtp_rtcp/source/rtp_sender_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_unittest.cc
@@ -1891,8 +1891,9 @@
   RTPVideoHeader header;
   header.codec = kVideoCodecVP9;
 
+  auto& vp9_header = header.video_type_header.emplace<RTPVideoHeaderVP9>();
   for (int tid = 1; tid <= kMaxTemporalStreams; ++tid) {
-    header.vp9().temporal_idx = tid;
+    vp9_header.temporal_idx = tid;
 
     EXPECT_EQ(kDontRetransmit, rtp_sender_video_->GetStorageType(
                                    header, kRetransmitOff,
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index a7f3df0..43450dc 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -471,7 +471,8 @@
     case kVideoCodecVP8:
       return header.vp8().temporalIdx;
     case kVideoCodecVP9:
-      return header.vp9().temporal_idx;
+      return absl::get<RTPVideoHeaderVP9>(header.video_type_header)
+          .temporal_idx;
     default:
       return kNoTemporalIdx;
   }
diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h
index cb4dd9a..ec3dbd2 100644
--- a/modules/rtp_rtcp/source/rtp_video_header.h
+++ b/modules/rtp_rtcp/source/rtp_video_header.h
@@ -44,20 +44,6 @@
 
     return absl::get<RTPVideoHeaderVP8>(video_type_header);
   }
-  // TODO(philipel): Remove when downstream projects have been updated.
-  RTPVideoHeaderVP9& vp9() {
-    if (!absl::holds_alternative<RTPVideoHeaderVP9>(video_type_header))
-      video_type_header.emplace<RTPVideoHeaderVP9>();
-
-    return absl::get<RTPVideoHeaderVP9>(video_type_header);
-  }
-  // TODO(philipel): Remove when downstream projects have been updated.
-  const RTPVideoHeaderVP9& vp9() const {
-    if (!absl::holds_alternative<RTPVideoHeaderVP9>(video_type_header))
-      video_type_header.emplace<RTPVideoHeaderVP9>();
-
-    return absl::get<RTPVideoHeaderVP9>(video_type_header);
-  }
 
   // Information for generic codec descriptor.
   int64_t frame_id = 0;
diff --git a/modules/video_coding/decoding_state_unittest.cc b/modules/video_coding/decoding_state_unittest.cc
index 740c384..0049ca7 100644
--- a/modules/video_coding/decoding_state_unittest.cc
+++ b/modules/video_coding/decoding_state_unittest.cc
@@ -458,7 +458,8 @@
   packet.dataPtr = data;
   packet.video_header.codec = kVideoCodecVP9;
 
-  RTPVideoHeaderVP9& vp9_hdr = packet.video_header.vp9();
+  auto& vp9_hdr =
+      packet.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
   vp9_hdr.picture_id = 10;
   vp9_hdr.flexible_mode = true;
 
@@ -501,7 +502,8 @@
   packet.dataPtr = data;
   packet.video_header.codec = kVideoCodecVP9;
 
-  RTPVideoHeaderVP9& vp9_hdr = packet.video_header.vp9();
+  auto& vp9_hdr =
+      packet.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
   vp9_hdr.picture_id = 10;
   vp9_hdr.flexible_mode = true;
 
@@ -556,7 +558,8 @@
   packet.dataPtr = data;
   packet.video_header.codec = kVideoCodecVP9;
 
-  RTPVideoHeaderVP9& vp9_hdr = packet.video_header.vp9();
+  auto& vp9_hdr =
+      packet.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
   vp9_hdr.picture_id = 10;
   vp9_hdr.flexible_mode = true;
 
diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc
index ebaa83b..2cb7936 100644
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@@ -77,6 +77,8 @@
         break;
       }
       case kVideoCodecVP9: {
+        const auto& vp9_header =
+            absl::get<RTPVideoHeaderVP9>(header->video_type_header);
         if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
           // This is the first packet for this frame.
           _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
@@ -86,48 +88,48 @@
           _codecSpecificInfo.codecType = kVideoCodecVP9;
         }
         _codecSpecificInfo.codecSpecific.VP9.inter_pic_predicted =
-            header->vp9().inter_pic_predicted;
+            vp9_header.inter_pic_predicted;
         _codecSpecificInfo.codecSpecific.VP9.flexible_mode =
-            header->vp9().flexible_mode;
+            vp9_header.flexible_mode;
         _codecSpecificInfo.codecSpecific.VP9.num_ref_pics =
-            header->vp9().num_ref_pics;
-        for (uint8_t r = 0; r < header->vp9().num_ref_pics; ++r) {
+            vp9_header.num_ref_pics;
+        for (uint8_t r = 0; r < vp9_header.num_ref_pics; ++r) {
           _codecSpecificInfo.codecSpecific.VP9.p_diff[r] =
-              header->vp9().pid_diff[r];
+              vp9_header.pid_diff[r];
         }
         _codecSpecificInfo.codecSpecific.VP9.ss_data_available =
-            header->vp9().ss_data_available;
-        if (header->vp9().temporal_idx != kNoTemporalIdx) {
+            vp9_header.ss_data_available;
+        if (vp9_header.temporal_idx != kNoTemporalIdx) {
           _codecSpecificInfo.codecSpecific.VP9.temporal_idx =
-              header->vp9().temporal_idx;
+              vp9_header.temporal_idx;
           _codecSpecificInfo.codecSpecific.VP9.temporal_up_switch =
-              header->vp9().temporal_up_switch;
+              vp9_header.temporal_up_switch;
         }
-        if (header->vp9().spatial_idx != kNoSpatialIdx) {
+        if (vp9_header.spatial_idx != kNoSpatialIdx) {
           _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
-              header->vp9().spatial_idx;
+              vp9_header.spatial_idx;
           _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
-              header->vp9().inter_layer_predicted;
+              vp9_header.inter_layer_predicted;
         }
-        if (header->vp9().gof_idx != kNoGofIdx) {
-          _codecSpecificInfo.codecSpecific.VP9.gof_idx = header->vp9().gof_idx;
+        if (vp9_header.gof_idx != kNoGofIdx) {
+          _codecSpecificInfo.codecSpecific.VP9.gof_idx = vp9_header.gof_idx;
         }
-        if (header->vp9().ss_data_available) {
+        if (vp9_header.ss_data_available) {
           _codecSpecificInfo.codecSpecific.VP9.num_spatial_layers =
-              header->vp9().num_spatial_layers;
+              vp9_header.num_spatial_layers;
           _codecSpecificInfo.codecSpecific.VP9
               .spatial_layer_resolution_present =
-              header->vp9().spatial_layer_resolution_present;
-          if (header->vp9().spatial_layer_resolution_present) {
-            for (size_t i = 0; i < header->vp9().num_spatial_layers; ++i) {
+              vp9_header.spatial_layer_resolution_present;
+          if (vp9_header.spatial_layer_resolution_present) {
+            for (size_t i = 0; i < vp9_header.num_spatial_layers; ++i) {
               _codecSpecificInfo.codecSpecific.VP9.width[i] =
-                  header->vp9().width[i];
+                  vp9_header.width[i];
               _codecSpecificInfo.codecSpecific.VP9.height[i] =
-                  header->vp9().height[i];
+                  vp9_header.height[i];
             }
           }
           _codecSpecificInfo.codecSpecific.VP9.gof.CopyGofInfoVP9(
-              header->vp9().gof);
+              vp9_header.gof);
         }
         break;
       }
diff --git a/modules/video_coding/jitter_buffer.cc b/modules/video_coding/jitter_buffer.cc
index 83f90e3..03a9845 100644
--- a/modules/video_coding/jitter_buffer.cc
+++ b/modules/video_coding/jitter_buffer.cc
@@ -127,10 +127,12 @@
 Vp9SsMap::~Vp9SsMap() {}
 
 bool Vp9SsMap::Insert(const VCMPacket& packet) {
-  if (!packet.video_header.vp9().ss_data_available)
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(packet.video_header.video_type_header);
+  if (!vp9_header.ss_data_available)
     return false;
 
-  ss_map_[packet.timestamp] = packet.video_header.vp9().gof;
+  ss_map_[packet.timestamp] = vp9_header.gof;
   return true;
 }
 
@@ -178,7 +180,9 @@
 
 // TODO(asapersson): Update according to updates in RTP payload profile.
 bool Vp9SsMap::UpdatePacket(VCMPacket* packet) {
-  uint8_t gof_idx = packet->video_header.vp9().gof_idx;
+  auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(packet->video_header.video_type_header);
+  uint8_t gof_idx = vp9_header.gof_idx;
   if (gof_idx == kNoGofIdx)
     return false;  // No update needed.
 
@@ -189,14 +193,13 @@
   if (gof_idx >= it->second.num_frames_in_gof)
     return false;  // Assume corresponding SS not yet received.
 
-  RTPVideoHeaderVP9* vp9 = &packet->video_header.vp9();
-  vp9->temporal_idx = it->second.temporal_idx[gof_idx];
-  vp9->temporal_up_switch = it->second.temporal_up_switch[gof_idx];
+  vp9_header.temporal_idx = it->second.temporal_idx[gof_idx];
+  vp9_header.temporal_up_switch = it->second.temporal_up_switch[gof_idx];
 
   // TODO(asapersson): Set vp9.ref_picture_id[i] and add usage.
-  vp9->num_ref_pics = it->second.num_ref_pics[gof_idx];
+  vp9_header.num_ref_pics = it->second.num_ref_pics[gof_idx];
   for (uint8_t i = 0; i < it->second.num_ref_pics[gof_idx]; ++i) {
-    vp9->pid_diff[i] = it->second.pid_diff[gof_idx][i];
+    vp9_header.pid_diff[i] = it->second.pid_diff[gof_idx][i];
   }
   return true;
 }
diff --git a/modules/video_coding/jitter_buffer_unittest.cc b/modules/video_coding/jitter_buffer_unittest.cc
index 756dc52..d1744d5 100644
--- a/modules/video_coding/jitter_buffer_unittest.cc
+++ b/modules/video_coding/jitter_buffer_unittest.cc
@@ -41,6 +41,8 @@
   Vp9SsMapTest() : packet_() {}
 
   virtual void SetUp() {
+    auto& vp9_header =
+        packet_.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
     packet_.is_first_packet_in_frame = true;
     packet_.dataPtr = data_;
     packet_.sizeBytes = 1400;
@@ -50,12 +52,12 @@
     packet_.frameType = kVideoFrameKey;
     packet_.codec = kVideoCodecVP9;
     packet_.video_header.codec = kVideoCodecVP9;
-    packet_.video_header.vp9().flexible_mode = false;
-    packet_.video_header.vp9().gof_idx = 0;
-    packet_.video_header.vp9().temporal_idx = kNoTemporalIdx;
-    packet_.video_header.vp9().temporal_up_switch = false;
-    packet_.video_header.vp9().ss_data_available = true;
-    packet_.video_header.vp9().gof.SetGofInfoVP9(
+    vp9_header.flexible_mode = false;
+    vp9_header.gof_idx = 0;
+    vp9_header.temporal_idx = kNoTemporalIdx;
+    vp9_header.temporal_up_switch = false;
+    vp9_header.ss_data_available = true;
+    vp9_header.gof.SetGofInfoVP9(
         kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
   }
 
@@ -69,7 +71,8 @@
 }
 
 TEST_F(Vp9SsMapTest, Insert_NoSsData) {
-  packet_.video_header.vp9().ss_data_available = false;
+  absl::get<RTPVideoHeaderVP9>(packet_.video_header.video_type_header)
+      .ss_data_available = false;
   EXPECT_FALSE(map_.Insert(packet_));
 }
 
@@ -146,52 +149,57 @@
 }
 
 TEST_F(Vp9SsMapTest, UpdatePacket_NoSsData) {
-  packet_.video_header.vp9().gof_idx = 0;
+  absl::get<RTPVideoHeaderVP9>(packet_.video_header.video_type_header).gof_idx =
+      0;
   EXPECT_FALSE(map_.UpdatePacket(&packet_));
 }
 
 TEST_F(Vp9SsMapTest, UpdatePacket_NoGofIdx) {
   EXPECT_TRUE(map_.Insert(packet_));
-  packet_.video_header.vp9().gof_idx = kNoGofIdx;
+  absl::get<RTPVideoHeaderVP9>(packet_.video_header.video_type_header).gof_idx =
+      kNoGofIdx;
   EXPECT_FALSE(map_.UpdatePacket(&packet_));
 }
 
 TEST_F(Vp9SsMapTest, UpdatePacket_InvalidGofIdx) {
   EXPECT_TRUE(map_.Insert(packet_));
-  packet_.video_header.vp9().gof_idx = 4;
+  absl::get<RTPVideoHeaderVP9>(packet_.video_header.video_type_header).gof_idx =
+      4;
   EXPECT_FALSE(map_.UpdatePacket(&packet_));
 }
 
 TEST_F(Vp9SsMapTest, UpdatePacket) {
+  auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(packet_.video_header.video_type_header);
   EXPECT_TRUE(map_.Insert(packet_));  // kTemporalStructureMode3: 0-2-1-2..
 
-  packet_.video_header.vp9().gof_idx = 0;
+  vp9_header.gof_idx = 0;
   EXPECT_TRUE(map_.UpdatePacket(&packet_));
-  EXPECT_EQ(0, packet_.video_header.vp9().temporal_idx);
-  EXPECT_FALSE(packet_.video_header.vp9().temporal_up_switch);
-  EXPECT_EQ(1U, packet_.video_header.vp9().num_ref_pics);
-  EXPECT_EQ(4, packet_.video_header.vp9().pid_diff[0]);
+  EXPECT_EQ(0, vp9_header.temporal_idx);
+  EXPECT_FALSE(vp9_header.temporal_up_switch);
+  EXPECT_EQ(1U, vp9_header.num_ref_pics);
+  EXPECT_EQ(4, vp9_header.pid_diff[0]);
 
-  packet_.video_header.vp9().gof_idx = 1;
+  vp9_header.gof_idx = 1;
   EXPECT_TRUE(map_.UpdatePacket(&packet_));
-  EXPECT_EQ(2, packet_.video_header.vp9().temporal_idx);
-  EXPECT_TRUE(packet_.video_header.vp9().temporal_up_switch);
-  EXPECT_EQ(1U, packet_.video_header.vp9().num_ref_pics);
-  EXPECT_EQ(1, packet_.video_header.vp9().pid_diff[0]);
+  EXPECT_EQ(2, vp9_header.temporal_idx);
+  EXPECT_TRUE(vp9_header.temporal_up_switch);
+  EXPECT_EQ(1U, vp9_header.num_ref_pics);
+  EXPECT_EQ(1, vp9_header.pid_diff[0]);
 
-  packet_.video_header.vp9().gof_idx = 2;
+  vp9_header.gof_idx = 2;
   EXPECT_TRUE(map_.UpdatePacket(&packet_));
-  EXPECT_EQ(1, packet_.video_header.vp9().temporal_idx);
-  EXPECT_TRUE(packet_.video_header.vp9().temporal_up_switch);
-  EXPECT_EQ(1U, packet_.video_header.vp9().num_ref_pics);
-  EXPECT_EQ(2, packet_.video_header.vp9().pid_diff[0]);
+  EXPECT_EQ(1, vp9_header.temporal_idx);
+  EXPECT_TRUE(vp9_header.temporal_up_switch);
+  EXPECT_EQ(1U, vp9_header.num_ref_pics);
+  EXPECT_EQ(2, vp9_header.pid_diff[0]);
 
-  packet_.video_header.vp9().gof_idx = 3;
+  vp9_header.gof_idx = 3;
   EXPECT_TRUE(map_.UpdatePacket(&packet_));
-  EXPECT_EQ(2, packet_.video_header.vp9().temporal_idx);
-  EXPECT_TRUE(packet_.video_header.vp9().temporal_up_switch);
-  EXPECT_EQ(1U, packet_.video_header.vp9().num_ref_pics);
-  EXPECT_EQ(1, packet_.video_header.vp9().pid_diff[0]);
+  EXPECT_EQ(2, vp9_header.temporal_idx);
+  EXPECT_TRUE(vp9_header.temporal_up_switch);
+  EXPECT_EQ(1U, vp9_header.num_ref_pics);
+  EXPECT_EQ(1, vp9_header.pid_diff[0]);
 }
 
 class TestBasicJitterBuffer : public ::testing::TestWithParam<std::string>,
@@ -920,25 +928,28 @@
   //  -------------------------------------------------
   // |<----------tl0idx:200--------->|<---tl0idx:201---
 
+  auto& vp9_header =
+      packet_->video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
+
   bool re = false;
   packet_->codec = kVideoCodecVP9;
   packet_->video_header.codec = kVideoCodecVP9;
   packet_->is_first_packet_in_frame = true;
   packet_->markerBit = true;
-  packet_->video_header.vp9().flexible_mode = false;
-  packet_->video_header.vp9().spatial_idx = 0;
-  packet_->video_header.vp9().beginning_of_frame = true;
-  packet_->video_header.vp9().end_of_frame = true;
-  packet_->video_header.vp9().temporal_up_switch = false;
+  vp9_header.flexible_mode = false;
+  vp9_header.spatial_idx = 0;
+  vp9_header.beginning_of_frame = true;
+  vp9_header.end_of_frame = true;
+  vp9_header.temporal_up_switch = false;
 
   packet_->seqNum = 65485;
   packet_->timestamp = 1000;
   packet_->frameType = kVideoFrameKey;
-  packet_->video_header.vp9().picture_id = 5;
-  packet_->video_header.vp9().tl0_pic_idx = 200;
-  packet_->video_header.vp9().temporal_idx = 0;
-  packet_->video_header.vp9().ss_data_available = true;
-  packet_->video_header.vp9().gof.SetGofInfoVP9(
+  vp9_header.picture_id = 5;
+  vp9_header.tl0_pic_idx = 200;
+  vp9_header.temporal_idx = 0;
+  vp9_header.ss_data_available = true;
+  vp9_header.gof.SetGofInfoVP9(
       kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
@@ -946,10 +957,10 @@
   packet_->seqNum = 65489;
   packet_->timestamp = 13000;
   packet_->frameType = kVideoFrameDelta;
-  packet_->video_header.vp9().picture_id = 9;
-  packet_->video_header.vp9().tl0_pic_idx = 201;
-  packet_->video_header.vp9().temporal_idx = 0;
-  packet_->video_header.vp9().ss_data_available = false;
+  vp9_header.picture_id = 9;
+  vp9_header.tl0_pic_idx = 201;
+  vp9_header.temporal_idx = 0;
+  vp9_header.ss_data_available = false;
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
   VCMEncodedFrame* frame_out = DecodeCompleteFrame();
@@ -973,31 +984,34 @@
   //  --------------------------------
   // |<--------tl0idx:200--------->|
 
+  auto& vp9_header =
+      packet_->video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
+
   bool re = false;
   packet_->codec = kVideoCodecVP9;
   packet_->video_header.codec = kVideoCodecVP9;
   packet_->is_first_packet_in_frame = true;
   packet_->markerBit = true;
-  packet_->video_header.vp9().flexible_mode = false;
-  packet_->video_header.vp9().spatial_idx = 0;
-  packet_->video_header.vp9().beginning_of_frame = true;
-  packet_->video_header.vp9().end_of_frame = true;
-  packet_->video_header.vp9().tl0_pic_idx = 200;
+  vp9_header.flexible_mode = false;
+  vp9_header.spatial_idx = 0;
+  vp9_header.beginning_of_frame = true;
+  vp9_header.end_of_frame = true;
+  vp9_header.tl0_pic_idx = 200;
 
   packet_->seqNum = 65486;
   packet_->timestamp = 6000;
   packet_->frameType = kVideoFrameDelta;
-  packet_->video_header.vp9().picture_id = 6;
-  packet_->video_header.vp9().temporal_idx = 2;
-  packet_->video_header.vp9().temporal_up_switch = true;
+  vp9_header.picture_id = 6;
+  vp9_header.temporal_idx = 2;
+  vp9_header.temporal_up_switch = true;
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
   packet_->seqNum = 65487;
   packet_->timestamp = 9000;
   packet_->frameType = kVideoFrameDelta;
-  packet_->video_header.vp9().picture_id = 7;
-  packet_->video_header.vp9().temporal_idx = 1;
-  packet_->video_header.vp9().temporal_up_switch = true;
+  vp9_header.picture_id = 7;
+  vp9_header.temporal_idx = 1;
+  vp9_header.temporal_up_switch = true;
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
   // Insert first frame with SS data.
@@ -1006,11 +1020,11 @@
   packet_->frameType = kVideoFrameKey;
   packet_->width = 352;
   packet_->height = 288;
-  packet_->video_header.vp9().picture_id = 5;
-  packet_->video_header.vp9().temporal_idx = 0;
-  packet_->video_header.vp9().temporal_up_switch = false;
-  packet_->video_header.vp9().ss_data_available = true;
-  packet_->video_header.vp9().gof.SetGofInfoVP9(
+  vp9_header.picture_id = 5;
+  vp9_header.temporal_idx = 0;
+  vp9_header.temporal_up_switch = false;
+  vp9_header.ss_data_available = true;
+  vp9_header.gof.SetGofInfoVP9(
       kTemporalStructureMode3);  // kTemporalStructureMode3: 0-2-1-2..
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
@@ -1049,33 +1063,36 @@
   //  -----------------------------------------
   // |<-----------tl0idx:200------------>|
 
+  auto& vp9_header =
+      packet_->video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
+
   bool re = false;
   packet_->codec = kVideoCodecVP9;
   packet_->video_header.codec = kVideoCodecVP9;
-  packet_->video_header.vp9().flexible_mode = false;
-  packet_->video_header.vp9().beginning_of_frame = true;
-  packet_->video_header.vp9().end_of_frame = true;
-  packet_->video_header.vp9().tl0_pic_idx = 200;
+  vp9_header.flexible_mode = false;
+  vp9_header.beginning_of_frame = true;
+  vp9_header.end_of_frame = true;
+  vp9_header.tl0_pic_idx = 200;
 
   packet_->is_first_packet_in_frame = true;
   packet_->markerBit = false;
   packet_->seqNum = 65486;
   packet_->timestamp = 6000;
   packet_->frameType = kVideoFrameDelta;
-  packet_->video_header.vp9().spatial_idx = 0;
-  packet_->video_header.vp9().picture_id = 6;
-  packet_->video_header.vp9().temporal_idx = 1;
-  packet_->video_header.vp9().temporal_up_switch = true;
+  vp9_header.spatial_idx = 0;
+  vp9_header.picture_id = 6;
+  vp9_header.temporal_idx = 1;
+  vp9_header.temporal_up_switch = true;
   EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
 
   packet_->is_first_packet_in_frame = false;
   packet_->markerBit = true;
   packet_->seqNum = 65487;
   packet_->frameType = kVideoFrameDelta;
-  packet_->video_header.vp9().spatial_idx = 1;
-  packet_->video_header.vp9().picture_id = 6;
-  packet_->video_header.vp9().temporal_idx = 1;
-  packet_->video_header.vp9().temporal_up_switch = true;
+  vp9_header.spatial_idx = 1;
+  vp9_header.picture_id = 6;
+  vp9_header.temporal_idx = 1;
+  vp9_header.temporal_up_switch = true;
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
   packet_->is_first_packet_in_frame = false;
@@ -1083,10 +1100,10 @@
   packet_->seqNum = 65485;
   packet_->timestamp = 3000;
   packet_->frameType = kVideoFrameKey;
-  packet_->video_header.vp9().spatial_idx = 1;
-  packet_->video_header.vp9().picture_id = 5;
-  packet_->video_header.vp9().temporal_idx = 0;
-  packet_->video_header.vp9().temporal_up_switch = false;
+  vp9_header.spatial_idx = 1;
+  vp9_header.picture_id = 5;
+  vp9_header.temporal_idx = 0;
+  vp9_header.temporal_up_switch = false;
   EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
 
   // Insert first frame with SS data.
@@ -1096,12 +1113,12 @@
   packet_->frameType = kVideoFrameKey;
   packet_->width = 352;
   packet_->height = 288;
-  packet_->video_header.vp9().spatial_idx = 0;
-  packet_->video_header.vp9().picture_id = 5;
-  packet_->video_header.vp9().temporal_idx = 0;
-  packet_->video_header.vp9().temporal_up_switch = false;
-  packet_->video_header.vp9().ss_data_available = true;
-  packet_->video_header.vp9().gof.SetGofInfoVP9(
+  vp9_header.spatial_idx = 0;
+  vp9_header.picture_id = 5;
+  vp9_header.temporal_idx = 0;
+  vp9_header.temporal_up_switch = false;
+  vp9_header.ss_data_available = true;
+  vp9_header.gof.SetGofInfoVP9(
       kTemporalStructureMode2);  // kTemporalStructureMode3: 0-1-0-1..
   EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
 
diff --git a/modules/video_coding/rtp_frame_reference_finder_unittest.cc b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
index 83454eb..9d26a2c 100644
--- a/modules/video_coding/rtp_frame_reference_finder_unittest.cc
+++ b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
@@ -135,26 +135,28 @@
                     bool up_switch = false,
                     GofInfoVP9* ss = nullptr) {
     VCMPacket packet;
+    auto& vp9_header =
+        packet.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
     packet.timestamp = pid;
     packet.codec = kVideoCodecVP9;
     packet.seqNum = seq_num_start;
     packet.markerBit = (seq_num_start == seq_num_end);
     packet.frameType = keyframe ? kVideoFrameKey : kVideoFrameDelta;
-    packet.video_header.vp9().flexible_mode = false;
-    packet.video_header.vp9().picture_id = pid % (1 << 15);
-    packet.video_header.vp9().temporal_idx = tid;
-    packet.video_header.vp9().spatial_idx = sid;
-    packet.video_header.vp9().tl0_pic_idx = tl0;
-    packet.video_header.vp9().temporal_up_switch = up_switch;
+    vp9_header.flexible_mode = false;
+    vp9_header.picture_id = pid % (1 << 15);
+    vp9_header.temporal_idx = tid;
+    vp9_header.spatial_idx = sid;
+    vp9_header.tl0_pic_idx = tl0;
+    vp9_header.temporal_up_switch = up_switch;
     if (ss != nullptr) {
-      packet.video_header.vp9().ss_data_available = true;
-      packet.video_header.vp9().gof = *ss;
+      vp9_header.ss_data_available = true;
+      vp9_header.gof = *ss;
     }
     ref_packet_buffer_->InsertPacket(&packet);
 
     if (seq_num_start != seq_num_end) {
       packet.markerBit = true;
-      packet.video_header.vp9().ss_data_available = false;
+      vp9_header.ss_data_available = false;
       packet.seqNum = seq_num_end;
       ref_packet_buffer_->InsertPacket(&packet);
     }
@@ -174,20 +176,22 @@
                      bool inter = false,
                      std::vector<uint8_t> refs = std::vector<uint8_t>()) {
     VCMPacket packet;
+    auto& vp9_header =
+        packet.video_header.video_type_header.emplace<RTPVideoHeaderVP9>();
     packet.timestamp = pid;
     packet.codec = kVideoCodecVP9;
     packet.seqNum = seq_num_start;
     packet.markerBit = (seq_num_start == seq_num_end);
     packet.frameType = keyframe ? kVideoFrameKey : kVideoFrameDelta;
-    packet.video_header.vp9().inter_layer_predicted = inter;
-    packet.video_header.vp9().flexible_mode = true;
-    packet.video_header.vp9().picture_id = pid % (1 << 15);
-    packet.video_header.vp9().temporal_idx = tid;
-    packet.video_header.vp9().spatial_idx = sid;
-    packet.video_header.vp9().tl0_pic_idx = tl0;
-    packet.video_header.vp9().num_ref_pics = refs.size();
+    vp9_header.inter_layer_predicted = inter;
+    vp9_header.flexible_mode = true;
+    vp9_header.picture_id = pid % (1 << 15);
+    vp9_header.temporal_idx = tid;
+    vp9_header.spatial_idx = sid;
+    vp9_header.tl0_pic_idx = tl0;
+    vp9_header.num_ref_pics = refs.size();
     for (size_t i = 0; i < refs.size(); ++i)
-      packet.video_header.vp9().pid_diff[i] = refs[i];
+      vp9_header.pid_diff[i] = refs[i];
     ref_packet_buffer_->InsertPacket(&packet);
 
     if (seq_num_start != seq_num_end) {
diff --git a/modules/video_coding/session_info.cc b/modules/video_coding/session_info.cc
index b0066ab..a80c7b2 100644
--- a/modules/video_coding/session_info.cc
+++ b/modules/video_coding/session_info.cc
@@ -64,7 +64,9 @@
   if (packets_.front().video_header.codec == kVideoCodecVP8) {
     return packets_.front().video_header.vp8().pictureId;
   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
-    return packets_.front().video_header.vp9().picture_id;
+    return absl::get<RTPVideoHeaderVP9>(
+               packets_.front().video_header.video_type_header)
+        .picture_id;
   } else {
     return kNoPictureId;
   }
@@ -76,7 +78,9 @@
   if (packets_.front().video_header.codec == kVideoCodecVP8) {
     return packets_.front().video_header.vp8().temporalIdx;
   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
-    return packets_.front().video_header.vp9().temporal_idx;
+    return absl::get<RTPVideoHeaderVP9>(
+               packets_.front().video_header.video_type_header)
+        .temporal_idx;
   } else {
     return kNoTemporalIdx;
   }
@@ -88,7 +92,9 @@
   if (packets_.front().video_header.codec == kVideoCodecVP8) {
     return packets_.front().video_header.vp8().layerSync;
   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
-    return packets_.front().video_header.vp9().temporal_up_switch;
+    return absl::get<RTPVideoHeaderVP9>(
+               packets_.front().video_header.video_type_header)
+        .temporal_up_switch;
   } else {
     return false;
   }
@@ -100,7 +106,9 @@
   if (packets_.front().video_header.codec == kVideoCodecVP8) {
     return packets_.front().video_header.vp8().tl0PicIdx;
   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
-    return packets_.front().video_header.vp9().tl0_pic_idx;
+    return absl::get<RTPVideoHeaderVP9>(
+               packets_.front().video_header.video_type_header)
+        .tl0_pic_idx;
   } else {
     return kNoTl0PicIdx;
   }
@@ -122,17 +130,19 @@
 }
 
 void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
-  if (packets_.empty() ||
-      packets_.front().video_header.codec != kVideoCodecVP9 ||
-      packets_.front().video_header.vp9().flexible_mode) {
+  if (packets_.empty())
     return;
-  }
-  packets_.front().video_header.vp9().temporal_idx = gof_info.temporal_idx[idx];
-  packets_.front().video_header.vp9().temporal_up_switch =
-      gof_info.temporal_up_switch[idx];
-  packets_.front().video_header.vp9().num_ref_pics = gof_info.num_ref_pics[idx];
+
+  auto* vp9_header = absl::get_if<RTPVideoHeaderVP9>(
+      &packets_.front().video_header.video_type_header);
+  if (!vp9_header || vp9_header->flexible_mode)
+    return;
+
+  vp9_header->temporal_idx = gof_info.temporal_idx[idx];
+  vp9_header->temporal_up_switch = gof_info.temporal_up_switch[idx];
+  vp9_header->num_ref_pics = gof_info.num_ref_pics[idx];
   for (uint8_t i = 0; i < gof_info.num_ref_pics[idx]; ++i) {
-    packets_.front().video_header.vp9().pid_diff[i] = gof_info.pid_diff[idx][i];
+    vp9_header->pid_diff[i] = gof_info.pid_diff[idx][i];
   }
 }
 
diff --git a/test/layer_filtering_transport.cc b/test/layer_filtering_transport.cc
index 0e8aa48..1f36619 100644
--- a/test/layer_filtering_transport.cc
+++ b/test/layer_filtering_transport.cc
@@ -135,32 +135,42 @@
         RtpDepacketizer::Create(is_vp8 ? kVideoCodecVP8 : kVideoCodecVP9));
     RtpDepacketizer::ParsedPayload parsed_payload;
     if (depacketizer->Parse(&parsed_payload, payload, payload_data_length)) {
-      const int temporal_idx = static_cast<int>(
-          is_vp8 ? parsed_payload.video_header().vp8().temporalIdx
-                 : parsed_payload.video_header().vp9().temporal_idx);
-      const int spatial_idx = static_cast<int>(
-          is_vp8 ? kNoSpatialIdx
-                 : parsed_payload.video_header().vp9().spatial_idx);
-      const bool non_ref_for_inter_layer_pred =
-          is_vp8 ? false
-                 : parsed_payload.video_header()
-                       .vp9()
-                       .non_ref_for_inter_layer_pred;
-      // The number of spatial layers is sent in ssData, which is included only
-      // in the first packet of the first spatial layer of a key frame.
-      if (!parsed_payload.video_header().vp9().inter_pic_predicted &&
-          parsed_payload.video_header().vp9().beginning_of_frame == 1 &&
-          spatial_idx == 0) {
-        num_active_spatial_layers_ =
-            parsed_payload.video_header().vp9().num_spatial_layers;
-      } else if (spatial_idx == kNoSpatialIdx)
+      int temporal_idx;
+      int spatial_idx;
+      bool non_ref_for_inter_layer_pred;
+      bool end_of_frame;
+
+      if (is_vp8) {
+        temporal_idx = parsed_payload.video_header().vp8().temporalIdx;
+        spatial_idx = kNoSpatialIdx;
         num_active_spatial_layers_ = 1;
+        non_ref_for_inter_layer_pred = false;
+        end_of_frame = true;
+      } else {
+        const auto& vp9_header = absl::get<RTPVideoHeaderVP9>(
+            parsed_payload.video_header().video_type_header);
+        temporal_idx = vp9_header.temporal_idx;
+        spatial_idx = vp9_header.spatial_idx;
+        non_ref_for_inter_layer_pred = vp9_header.non_ref_for_inter_layer_pred;
+        end_of_frame = vp9_header.end_of_frame;
+
+        // The number of spatial layers is sent in ssData, which is included
+        // only in the first packet of the first spatial layer of a key frame.
+        if (!vp9_header.inter_pic_predicted &&
+            vp9_header.beginning_of_frame == 1 && spatial_idx == 0) {
+          num_active_spatial_layers_ = vp9_header.num_spatial_layers;
+        }
+      }
+
+      if (spatial_idx == kNoSpatialIdx)
+        num_active_spatial_layers_ = 1;
+
       RTC_CHECK_GT(num_active_spatial_layers_, 0);
 
       if (selected_sl_ >= 0 &&
           spatial_idx ==
               std::min(num_active_spatial_layers_ - 1, selected_sl_) &&
-          parsed_payload.video_header().vp9().end_of_frame) {
+          end_of_frame) {
         // This layer is now the last in the superframe.
         set_marker_bit = true;
       } else {
diff --git a/video/picture_id_tests.cc b/video/picture_id_tests.cc
index 0da5d0a..3c43d7d 100644
--- a/video/picture_id_tests.cc
+++ b/video/picture_id_tests.cc
@@ -101,11 +101,14 @@
         parsed->tl0_pic_idx = parsed_payload.video_header().vp8().tl0PicIdx;
         parsed->temporal_idx = parsed_payload.video_header().vp8().temporalIdx;
         break;
-      case kVideoCodecVP9:
-        parsed->picture_id = parsed_payload.video_header().vp9().picture_id;
-        parsed->tl0_pic_idx = parsed_payload.video_header().vp9().tl0_pic_idx;
-        parsed->temporal_idx = parsed_payload.video_header().vp9().temporal_idx;
+      case kVideoCodecVP9: {
+        const auto& vp9_header = absl::get<RTPVideoHeaderVP9>(
+            parsed_payload.video_header().video_type_header);
+        parsed->picture_id = vp9_header.picture_id;
+        parsed->tl0_pic_idx = vp9_header.tl0_pic_idx;
+        parsed->temporal_idx = vp9_header.temporal_idx;
         break;
+      }
       default:
         RTC_NOTREACHED();
         break;
diff --git a/video/video_analyzer.cc b/video/video_analyzer.cc
index 66c4db6..04b5c76 100644
--- a/video/video_analyzer.cc
+++ b/video/video_analyzer.cc
@@ -416,12 +416,19 @@
     bool result =
         depacketizer->Parse(&parsed_payload, payload, payload_data_length);
     RTC_DCHECK(result);
-    const int temporal_idx = static_cast<int>(
-        is_vp8 ? parsed_payload.video_header().vp8().temporalIdx
-               : parsed_payload.video_header().vp9().temporal_idx);
-    const int spatial_idx = static_cast<int>(
-        is_vp8 ? kNoSpatialIdx
-               : parsed_payload.video_header().vp9().spatial_idx);
+
+    int temporal_idx;
+    int spatial_idx;
+    if (is_vp8) {
+      temporal_idx = parsed_payload.video_header().vp8().temporalIdx;
+      spatial_idx = kNoTemporalIdx;
+    } else {
+      const auto& vp9_header = absl::get<RTPVideoHeaderVP9>(
+          parsed_payload.video_header().video_type_header);
+      temporal_idx = vp9_header.temporal_idx;
+      spatial_idx = vp9_header.spatial_idx;
+    }
+
     return (selected_tl_ < 0 || temporal_idx == kNoTemporalIdx ||
             temporal_idx <= selected_tl_) &&
            (selected_sl_ < 0 || spatial_idx == kNoSpatialIdx ||
diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc
index f5cecce..4bf5b99 100644
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@@ -3176,17 +3176,19 @@
       EXPECT_TRUE(depacketizer.Parse(&parsed, payload, payload_length));
       EXPECT_EQ(VideoCodecType::kVideoCodecVP9, parsed.video_header().codec);
       // Verify common fields for all configurations.
-      VerifyCommonHeader(parsed.video_header().vp9());
+      const auto& vp9_header =
+          absl::get<RTPVideoHeaderVP9>(parsed.video_header().video_type_header);
+      VerifyCommonHeader(vp9_header);
       CompareConsecutiveFrames(header, parsed.video_header());
       // Verify configuration specific settings.
-      InspectHeader(parsed.video_header().vp9());
+      InspectHeader(vp9_header);
 
       ++packets_sent_;
       if (header.markerBit) {
         ++frames_sent_;
       }
       last_header_ = header;
-      last_vp9_ = parsed.video_header().vp9();
+      last_vp9_ = vp9_header;
     }
     return SEND_PACKET;
   }
@@ -3371,7 +3373,8 @@
 
   void CompareConsecutiveFrames(const RTPHeader& header,
                                 const RTPVideoHeader& video) const {
-    const RTPVideoHeaderVP9& vp9 = video.vp9();
+    const auto& vp9_header =
+        absl::get<RTPVideoHeaderVP9>(video.video_type_header);
 
     bool new_frame = packets_sent_ == 0 ||
                      IsNewerTimestamp(header.timestamp, last_header_.timestamp);
@@ -3379,22 +3382,22 @@
     if (!new_frame) {
       EXPECT_FALSE(last_header_.markerBit);
       EXPECT_EQ(last_header_.timestamp, header.timestamp);
-      EXPECT_EQ(last_vp9_.picture_id, vp9.picture_id);
-      EXPECT_EQ(last_vp9_.temporal_idx, vp9.temporal_idx);
-      EXPECT_EQ(last_vp9_.tl0_pic_idx, vp9.tl0_pic_idx);
-      VerifySpatialIdxWithinFrame(vp9);
+      EXPECT_EQ(last_vp9_.picture_id, vp9_header.picture_id);
+      EXPECT_EQ(last_vp9_.temporal_idx, vp9_header.temporal_idx);
+      EXPECT_EQ(last_vp9_.tl0_pic_idx, vp9_header.tl0_pic_idx);
+      VerifySpatialIdxWithinFrame(vp9_header);
       return;
     }
     // New frame.
-    EXPECT_TRUE(vp9.beginning_of_frame);
+    EXPECT_TRUE(vp9_header.beginning_of_frame);
 
     // Compare with last packet in previous frame.
     if (frames_sent_ == 0)
       return;
     EXPECT_TRUE(last_vp9_.end_of_frame);
     EXPECT_TRUE(last_header_.markerBit);
-    EXPECT_TRUE(ContinuousPictureId(vp9));
-    VerifyTl0Idx(vp9);
+    EXPECT_TRUE(ContinuousPictureId(vp9_header));
+    VerifyTl0Idx(vp9_header);
   }
 
   test::FunctionVideoEncoderFactory encoder_factory_;