in RtpSenderVideo add support for writing DependencyDescriptor header extension
Bug: webrtc:10342
Change-Id: I12cca9c5e1606338bb914e58e13d268bbc6961f9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166532
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30427}
diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn
index daaac94..099c066 100644
--- a/modules/rtp_rtcp/BUILD.gn
+++ b/modules/rtp_rtcp/BUILD.gn
@@ -251,6 +251,7 @@
"../../api/rtc_event_log",
"../../api/transport:field_trial_based_config",
"../../api/transport:webrtc_key_value_config",
+ "../../api/transport/rtp:dependency_descriptor",
"../../api/transport/rtp:rtp_source",
"../../api/units:data_rate",
"../../api/units:time_delta",
@@ -332,6 +333,7 @@
]
deps = [
"../../:webrtc_common",
+ "../../api/transport/rtp:dependency_descriptor",
"../../api/video:video_frame",
"../../api/video:video_frame_type",
"../../api/video:video_rtp_headers",
@@ -508,6 +510,7 @@
"../../api:transport_api",
"../../api/rtc_event_log",
"../../api/transport:field_trial_based_config",
+ "../../api/transport/rtp:dependency_descriptor",
"../../api/units:timestamp",
"../../api/video:encoded_image",
"../../api/video:video_bitrate_allocation",
@@ -518,6 +521,7 @@
"../../api/video_codecs:video_codecs_api",
"../../call:rtp_receiver",
"../../common_video",
+ "../../common_video/generic_frame_descriptor",
"../../common_video/test:utilities",
"../../logging:mocks",
"../../rtc_base:checks",
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc
index 9779df1..fc176c9 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video.cc
@@ -18,12 +18,15 @@
#include <string>
#include <utility>
+#include "absl/memory/memory.h"
#include "absl/strings/match.h"
#include "api/crypto/frame_encryptor_interface.h"
+#include "api/transport/rtp/dependency_descriptor.h"
#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
#include "modules/rtp_rtcp/source/byte_io.h"
+#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_header_extensions.h"
@@ -72,6 +75,7 @@
const RTPVideoHeader& video_header,
const absl::optional<PlayoutDelay>& playout_delay,
const absl::optional<AbsoluteCaptureTime>& absolute_capture_time,
+ FrameDependencyStructure* video_structure,
bool set_video_rotation,
bool set_color_space,
bool set_frame_marking,
@@ -115,34 +119,71 @@
}
if (video_header.generic) {
- RtpGenericFrameDescriptor generic_descriptor;
- generic_descriptor.SetFirstPacketInSubFrame(first_packet);
- generic_descriptor.SetLastPacketInSubFrame(last_packet);
- generic_descriptor.SetDiscardable(video_header.generic->discardable);
-
- if (first_packet) {
- generic_descriptor.SetFrameId(
- static_cast<uint16_t>(video_header.generic->frame_id));
+ bool extension_is_set = false;
+ if (video_structure != nullptr) {
+ DependencyDescriptor descriptor;
+ descriptor.first_packet_in_frame = first_packet;
+ descriptor.last_packet_in_frame = last_packet;
+ descriptor.frame_number = video_header.generic->frame_id & 0xFFFF;
+ descriptor.frame_dependencies.spatial_id =
+ video_header.generic->spatial_index;
+ descriptor.frame_dependencies.temporal_id =
+ video_header.generic->temporal_index;
for (int64_t dep : video_header.generic->dependencies) {
- generic_descriptor.AddFrameDependencyDiff(
+ descriptor.frame_dependencies.frame_diffs.push_back(
video_header.generic->frame_id - dep);
}
+ descriptor.frame_dependencies.decode_target_indications =
+ video_header.generic->decode_target_indications;
+ RTC_DCHECK_EQ(
+ descriptor.frame_dependencies.decode_target_indications.size(),
+ video_structure->num_decode_targets);
- uint8_t spatial_bimask = 1 << video_header.generic->spatial_index;
- generic_descriptor.SetSpatialLayersBitmask(spatial_bimask);
-
- generic_descriptor.SetTemporalLayer(video_header.generic->temporal_index);
-
- if (video_header.frame_type == VideoFrameType::kVideoFrameKey) {
- generic_descriptor.SetResolution(video_header.width,
- video_header.height);
+ // To avoid extra structure copy, temporary share ownership of the
+ // video_structure with the dependency descriptor.
+ if (video_header.frame_type == VideoFrameType::kVideoFrameKey &&
+ first_packet) {
+ descriptor.attached_structure = absl::WrapUnique(video_structure);
}
+ extension_is_set = packet->SetExtension<RtpDependencyDescriptorExtension>(
+ *video_structure, descriptor);
+
+ // Remove the temporary shared ownership.
+ descriptor.attached_structure.release();
}
- if (!packet->SetExtension<RtpGenericFrameDescriptorExtension01>(
- generic_descriptor)) {
- packet->SetExtension<RtpGenericFrameDescriptorExtension00>(
- generic_descriptor);
+ // Do not use v0/v1 generic frame descriptor when v2 is stored.
+ if (!extension_is_set) {
+ RtpGenericFrameDescriptor generic_descriptor;
+ generic_descriptor.SetFirstPacketInSubFrame(first_packet);
+ generic_descriptor.SetLastPacketInSubFrame(last_packet);
+ generic_descriptor.SetDiscardable(video_header.generic->discardable);
+
+ if (first_packet) {
+ generic_descriptor.SetFrameId(
+ static_cast<uint16_t>(video_header.generic->frame_id));
+ for (int64_t dep : video_header.generic->dependencies) {
+ generic_descriptor.AddFrameDependencyDiff(
+ video_header.generic->frame_id - dep);
+ }
+
+ uint8_t spatial_bimask = 1 << video_header.generic->spatial_index;
+ generic_descriptor.SetSpatialLayersBitmask(spatial_bimask);
+
+ generic_descriptor.SetTemporalLayer(
+ video_header.generic->temporal_index);
+
+ if (video_header.frame_type == VideoFrameType::kVideoFrameKey) {
+ generic_descriptor.SetResolution(video_header.width,
+ video_header.height);
+ }
+ }
+
+ if (!packet->SetExtension<RtpGenericFrameDescriptorExtension01>(
+ generic_descriptor)) {
+ packet->SetExtension<RtpGenericFrameDescriptorExtension00>(
+ generic_descriptor);
+ }
}
}
}
@@ -417,6 +458,38 @@
return absl::nullopt;
}
+void RTPSenderVideo::SetVideoStructure(
+ const FrameDependencyStructure* video_structure) {
+ RTC_DCHECK_RUNS_SERIALIZED(&send_checker_);
+ if (video_structure == nullptr) {
+ video_structure_ = nullptr;
+ return;
+ }
+ // Simple sanity checks video structure is set up.
+ RTC_DCHECK_GT(video_structure->num_decode_targets, 0);
+ RTC_DCHECK_GT(video_structure->templates.size(), 0);
+
+ int structure_id = 0;
+ if (video_structure_) {
+ if (*video_structure_ == *video_structure) {
+ // Same structure (just a new key frame), no update required.
+ return;
+ }
+ // When setting different video structure make sure structure_id is updated
+ // so that templates from different structures do not collide.
+ static constexpr int kMaxTemplates = 64;
+ structure_id =
+ (video_structure_->structure_id + video_structure_->templates.size()) %
+ kMaxTemplates;
+ }
+
+ video_structure_ =
+ std::make_unique<FrameDependencyStructure>(*video_structure);
+ video_structure_->structure_id = structure_id;
+ // TODO(bugs.webrtc.org/10342): Support chains.
+ video_structure_->num_chains = 0;
+}
+
bool RTPSenderVideo::SendVideo(
int payload_type,
absl::optional<VideoCodecType> codec_type,
@@ -523,16 +596,20 @@
auto last_packet = std::make_unique<RtpPacketToSend>(*single_packet);
// Simplest way to estimate how much extensions would occupy is to set them.
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
- set_video_rotation, set_color_space, set_frame_marking,
+ video_structure_.get(), set_video_rotation,
+ set_color_space, set_frame_marking,
/*first=*/true, /*last=*/true, single_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
- set_video_rotation, set_color_space, set_frame_marking,
+ video_structure_.get(), set_video_rotation,
+ set_color_space, set_frame_marking,
/*first=*/true, /*last=*/false, first_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
- set_video_rotation, set_color_space, set_frame_marking,
+ video_structure_.get(), set_video_rotation,
+ set_color_space, set_frame_marking,
/*first=*/false, /*last=*/false, middle_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
- set_video_rotation, set_color_space, set_frame_marking,
+ video_structure_.get(), set_video_rotation,
+ set_color_space, set_frame_marking,
/*first=*/false, /*last=*/true, last_packet.get());
RTC_DCHECK_GT(packet_capacity, single_packet->headers_size());
diff --git a/modules/rtp_rtcp/source/rtp_sender_video.h b/modules/rtp_rtcp/source/rtp_sender_video.h
index 3f4c676..053877e 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video.h
+++ b/modules/rtp_rtcp/source/rtp_sender_video.h
@@ -18,6 +18,7 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
+#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_frame_type.h"
#include "modules/include/module_common_types.h"
@@ -103,6 +104,13 @@
const RTPFragmentationHeader* fragmentation,
RTPVideoHeader video_header,
absl::optional<int64_t> expected_retransmission_time_ms);
+ // Configures video structures produced by encoder to send using the
+ // dependency descriptor rtp header extension. Next call to SendVideo should
+ // have video_header.frame_type == kVideoFrameKey.
+ // All calls to SendVideo after this call must use video_header compatible
+ // with the video_structure.
+ void SetVideoStructure(const FrameDependencyStructure* video_structure);
+
// FlexFEC/ULPFEC.
// Set FEC rates, max frames before FEC is sent, and type of FEC masks.
// Returns false on failure.
@@ -184,6 +192,8 @@
VideoRotation last_rotation_ RTC_GUARDED_BY(send_checker_);
absl::optional<ColorSpace> last_color_space_ RTC_GUARDED_BY(send_checker_);
bool transmit_color_space_next_frame_ RTC_GUARDED_BY(send_checker_);
+ std::unique_ptr<FrameDependencyStructure> video_structure_
+ RTC_GUARDED_BY(send_checker_);
// Tracks the current request for playout delay limits from application
// and decides whether the current RTP frame should include the playout
diff --git a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
index 7ccd0ac..867e05b 100644
--- a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
@@ -10,15 +10,20 @@
#include "modules/rtp_rtcp/source/rtp_sender_video.h"
+#include <memory>
#include <string>
+#include <utility>
#include <vector>
+#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_codec_constants.h"
#include "api/video/video_timing.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/rtp_rtcp/include/rtp_cvo.h"
#include "modules/rtp_rtcp/include/rtp_header_extension_map.h"
#include "modules/rtp_rtcp/include/rtp_rtcp.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
@@ -35,12 +40,15 @@
namespace {
using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
enum : int { // The first valid value is 1.
kAbsoluteSendTimeExtensionId = 1,
kFrameMarkingExtensionId,
kGenericDescriptorId00,
kGenericDescriptorId01,
+ kGenericDescriptorId02,
kTransmissionTimeOffsetExtensionId,
kTransportSequenceNumberExtensionId,
kVideoRotationExtensionId,
@@ -73,6 +81,8 @@
kGenericDescriptorId00);
receivers_extensions_.Register<RtpGenericFrameDescriptorExtension01>(
kGenericDescriptorId01);
+ receivers_extensions_.Register<RtpDependencyDescriptorExtension>(
+ kGenericDescriptorId02);
receivers_extensions_.Register<FrameMarkingExtension>(
kFrameMarkingExtensionId);
receivers_extensions_.Register<AbsoluteCaptureTimeExtension>(
@@ -522,6 +532,148 @@
EXPECT_TRUE(rtp_sender_video_.AllowRetransmission(header, kSettings, kRttMs));
}
+TEST_P(RtpSenderVideoTest, SendsDependencyDescriptorWhenVideoStructureIsSet) {
+ const int64_t kFrameId = 100000;
+ uint8_t kFrame[100];
+ rtp_module_->RegisterRtpHeaderExtension(
+ RtpDependencyDescriptorExtension::kUri, kGenericDescriptorId02);
+ FrameDependencyStructure video_structure;
+ video_structure.num_decode_targets = 2;
+ video_structure.templates = {
+ GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
+ GenericFrameInfo::Builder().S(1).T(0).Dtis("-S").Build(),
+ GenericFrameInfo::Builder().S(1).T(1).Dtis("-D").Build(),
+ };
+ rtp_sender_video_.SetVideoStructure(&video_structure);
+
+ // Send key frame.
+ RTPVideoHeader hdr;
+ RTPVideoHeader::GenericDescriptorInfo& generic = hdr.generic.emplace();
+ generic.frame_id = kFrameId;
+ generic.temporal_index = 0;
+ generic.spatial_index = 0;
+ generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
+ DecodeTargetIndication::kSwitch};
+ hdr.frame_type = VideoFrameType::kVideoFrameKey;
+ rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
+ hdr, kDefaultExpectedRetransmissionTimeMs);
+
+ ASSERT_EQ(transport_.packets_sent(), 1);
+ DependencyDescriptor descriptor_key;
+ ASSERT_TRUE(transport_.last_sent_packet()
+ .GetExtension<RtpDependencyDescriptorExtension>(
+ nullptr, &descriptor_key));
+ ASSERT_TRUE(descriptor_key.attached_structure);
+ EXPECT_EQ(descriptor_key.attached_structure->num_decode_targets, 2);
+ EXPECT_THAT(descriptor_key.attached_structure->templates, SizeIs(3));
+ EXPECT_EQ(descriptor_key.frame_number, kFrameId & 0xFFFF);
+ EXPECT_EQ(descriptor_key.frame_dependencies.spatial_id, 0);
+ EXPECT_EQ(descriptor_key.frame_dependencies.temporal_id, 0);
+ EXPECT_EQ(descriptor_key.frame_dependencies.decode_target_indications,
+ generic.decode_target_indications);
+ EXPECT_THAT(descriptor_key.frame_dependencies.frame_diffs, IsEmpty());
+
+ // Send delta frame.
+ generic.frame_id = kFrameId + 1;
+ generic.temporal_index = 1;
+ generic.spatial_index = 1;
+ generic.dependencies = {kFrameId, kFrameId - 500};
+ generic.decode_target_indications = {DecodeTargetIndication::kNotPresent,
+ DecodeTargetIndication::kRequired};
+ hdr.frame_type = VideoFrameType::kVideoFrameDelta;
+ rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
+ hdr, kDefaultExpectedRetransmissionTimeMs);
+
+ EXPECT_EQ(transport_.packets_sent(), 2);
+ DependencyDescriptor descriptor_delta;
+ ASSERT_TRUE(
+ transport_.last_sent_packet()
+ .GetExtension<RtpDependencyDescriptorExtension>(
+ descriptor_key.attached_structure.get(), &descriptor_delta));
+ EXPECT_EQ(descriptor_delta.attached_structure, nullptr);
+ EXPECT_EQ(descriptor_delta.frame_number, (kFrameId + 1) & 0xFFFF);
+ EXPECT_EQ(descriptor_delta.frame_dependencies.spatial_id, 1);
+ EXPECT_EQ(descriptor_delta.frame_dependencies.temporal_id, 1);
+ EXPECT_EQ(descriptor_delta.frame_dependencies.decode_target_indications,
+ generic.decode_target_indications);
+ EXPECT_THAT(descriptor_delta.frame_dependencies.frame_diffs,
+ ElementsAre(1, 501));
+}
+
+TEST_P(RtpSenderVideoTest,
+ SetDiffentVideoStructureAvoidsCollisionWithThePreviousStructure) {
+ const int64_t kFrameId = 100000;
+ uint8_t kFrame[100];
+ rtp_module_->RegisterRtpHeaderExtension(
+ RtpDependencyDescriptorExtension::kUri, kGenericDescriptorId02);
+ FrameDependencyStructure video_structure1;
+ video_structure1.num_decode_targets = 2;
+ video_structure1.templates = {
+ GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
+ GenericFrameInfo::Builder().S(0).T(1).Dtis("D-").Build(),
+ };
+ FrameDependencyStructure video_structure2;
+ video_structure2.num_decode_targets = 2;
+ video_structure2.templates = {
+ GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
+ GenericFrameInfo::Builder().S(0).T(1).Dtis("R-").Build(),
+ };
+
+ // Send 1st key frame.
+ RTPVideoHeader hdr;
+ RTPVideoHeader::GenericDescriptorInfo& generic = hdr.generic.emplace();
+ generic.frame_id = kFrameId;
+ generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
+ DecodeTargetIndication::kSwitch};
+ hdr.frame_type = VideoFrameType::kVideoFrameKey;
+ rtp_sender_video_.SetVideoStructure(&video_structure1);
+ rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
+ hdr, kDefaultExpectedRetransmissionTimeMs);
+ // Parse 1st extension.
+ ASSERT_EQ(transport_.packets_sent(), 1);
+ DependencyDescriptor descriptor_key1;
+ ASSERT_TRUE(transport_.last_sent_packet()
+ .GetExtension<RtpDependencyDescriptorExtension>(
+ nullptr, &descriptor_key1));
+ ASSERT_TRUE(descriptor_key1.attached_structure);
+
+ // Send the delta frame.
+ generic.frame_id = kFrameId + 1;
+ generic.temporal_index = 1;
+ generic.decode_target_indications = {DecodeTargetIndication::kDiscardable,
+ DecodeTargetIndication::kNotPresent};
+ hdr.frame_type = VideoFrameType::kVideoFrameDelta;
+ rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
+ hdr, kDefaultExpectedRetransmissionTimeMs);
+
+ ASSERT_EQ(transport_.packets_sent(), 2);
+ RtpPacket delta_packet = transport_.last_sent_packet();
+
+ // Send 2nd key frame.
+ generic.frame_id = kFrameId + 2;
+ generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
+ DecodeTargetIndication::kSwitch};
+ hdr.frame_type = VideoFrameType::kVideoFrameKey;
+ rtp_sender_video_.SetVideoStructure(&video_structure2);
+ rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
+ hdr, kDefaultExpectedRetransmissionTimeMs);
+ // Parse the 2nd key frame.
+ ASSERT_EQ(transport_.packets_sent(), 3);
+ DependencyDescriptor descriptor_key2;
+ ASSERT_TRUE(transport_.last_sent_packet()
+ .GetExtension<RtpDependencyDescriptorExtension>(
+ nullptr, &descriptor_key2));
+ ASSERT_TRUE(descriptor_key2.attached_structure);
+
+ // Try to parse the 1st delta frame. It should parseble using the structure
+ // from the 1st key frame, but not using the structure from the 2nd key frame.
+ DependencyDescriptor descriptor_delta;
+ EXPECT_TRUE(delta_packet.GetExtension<RtpDependencyDescriptorExtension>(
+ descriptor_key1.attached_structure.get(), &descriptor_delta));
+ EXPECT_FALSE(delta_packet.GetExtension<RtpDependencyDescriptorExtension>(
+ descriptor_key2.attached_structure.get(), &descriptor_delta));
+}
+
void RtpSenderVideoTest::PopulateGenericFrameDescriptor(int version) {
const absl::string_view ext_uri =
(version == 0) ? RtpGenericFrameDescriptorExtension00::kUri
diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h
index b66cba8..714d1eb 100644
--- a/modules/rtp_rtcp/source/rtp_video_header.h
+++ b/modules/rtp_rtcp/source/rtp_video_header.h
@@ -15,6 +15,7 @@
#include "absl/container/inlined_vector.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
+#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/color_space.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_content_type.h"
@@ -50,6 +51,7 @@
int64_t frame_id = 0;
int spatial_index = 0;
int temporal_index = 0;
+ absl::InlinedVector<DecodeTargetIndication, 10> decode_target_indications;
absl::InlinedVector<int64_t, 5> dependencies;
bool discardable = false;
};