Enable End-to-End Encrypted Audio Payloads.

This change integrates the FrameDecryptorInterface and the FrameEncryptorInterface into
the audio media path. If a FrameEncryptorInterface is set on an outgoing audio RTPSender
then each outgoing audio payload will first pass through the provided FrameEncryptor which
will have a chance to modify the payload contents for the purposes of encryption.

If a FrameDecryptorInterface is set on an incoming audio RtpReceiver then each incoming
audio payload will first pass through the provided FrameDecryptor which have a chance to
modify the payload contents for the purpose of decryption.

While AEAD is supported by the FrameDecryptor/FrameEncryptor interfaces this CL does not
use it and so it is left as null.

Bug: webrtc:9681
Change-Id: Ic383a9dce280528739f9d271357c2220e0a0dccf
Reviewed-on: https://webrtc-review.googlesource.com/c/101702
Commit-Queue: Benjamin Wright <benwright@webrtc.org>
Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org>
Reviewed-by: Steve Anton <steveanton@webrtc.org>
Reviewed-by: Emad Omara <emadomara@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25001}
diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc
index b9faea5..7e473ed 100644
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@@ -76,7 +76,7 @@
           nullptr /* RtcpRttStats */, event_log, config.rtp.remote_ssrc,
           config.jitter_buffer_max_packets,
           config.jitter_buffer_fast_accelerate, config.decoder_factory,
-          config.codec_pair_id));
+          config.codec_pair_id, config.frame_decryptor));
 }
 }  // namespace
 
diff --git a/audio/audio_send_stream.cc b/audio/audio_send_stream.cc
index 06be261..e82264a 100644
--- a/audio/audio_send_stream.cc
+++ b/audio/audio_send_stream.cc
@@ -50,10 +50,12 @@
     rtc::TaskQueue* worker_queue,
     ProcessThread* module_process_thread,
     RtcpRttStats* rtcp_rtt_stats,
-    RtcEventLog* event_log) {
+    RtcEventLog* event_log,
+    FrameEncryptorInterface* frame_encryptor) {
   return absl::make_unique<voe::ChannelSendProxy>(
       absl::make_unique<voe::ChannelSend>(worker_queue, module_process_thread,
-                                          rtcp_rtt_stats, event_log));
+                                          rtcp_rtt_stats, event_log,
+                                          frame_encryptor));
 }
 }  // namespace
 
@@ -103,7 +105,8 @@
                       CreateChannelAndProxy(worker_queue,
                                             module_process_thread,
                                             rtcp_rtt_stats,
-                                            event_log)) {}
+                                            event_log,
+                                            config.frame_encryptor)) {}
 
 AudioSendStream::AudioSendStream(
     const webrtc::AudioSendStream::Config& config,
@@ -227,6 +230,11 @@
         stream->timed_send_transport_adapter_.get());
   }
 
+  // Enable the frame encryptor if a new frame encryptor has been provided.
+  if (first_time || new_config.frame_encryptor != old_config.frame_encryptor) {
+    channel_proxy->SetFrameEncryptor(new_config.frame_encryptor);
+  }
+
   const ExtensionIds old_ids = FindExtensionIds(old_config.rtp.extensions);
   const ExtensionIds new_ids = FindExtensionIds(new_config.rtp.extensions);
   // Audio level indication
diff --git a/audio/audio_send_stream_unittest.cc b/audio/audio_send_stream_unittest.cc
index 0ed1ac1..0a954f8 100644
--- a/audio/audio_send_stream_unittest.cc
+++ b/audio/audio_send_stream_unittest.cc
@@ -196,6 +196,7 @@
     EXPECT_CALL(*channel_proxy_, SetLocalSSRC(kSsrc)).Times(1);
     EXPECT_CALL(*channel_proxy_, SetRTCP_CNAME(StrEq(kCName))).Times(1);
     EXPECT_CALL(*channel_proxy_, SetNACKStatus(true, 10)).Times(1);
+    EXPECT_CALL(*channel_proxy_, SetFrameEncryptor(nullptr)).Times(1);
     EXPECT_CALL(*channel_proxy_,
                 SetSendAudioLevelIndicationStatus(true, kAudioLevelId))
         .Times(1);
diff --git a/audio/channel_receive.cc b/audio/channel_receive.cc
index 784e4a7..26213cc 100644
--- a/audio/channel_receive.cc
+++ b/audio/channel_receive.cc
@@ -229,7 +229,8 @@
     size_t jitter_buffer_max_packets,
     bool jitter_buffer_fast_playout,
     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
-    absl::optional<AudioCodecPairId> codec_pair_id)
+    absl::optional<AudioCodecPairId> codec_pair_id,
+    FrameDecryptorInterface* frame_decryptor)
     : event_log_(rtc_event_log),
       rtp_receive_statistics_(
           ReceiveStatistics::Create(Clock::GetRealTimeClock())),
@@ -245,7 +246,8 @@
       _audioDeviceModulePtr(audio_device_module),
       _transportPtr(NULL),
       _outputGain(1.0f),
-      associated_send_channel_(nullptr) {
+      associated_send_channel_(nullptr),
+      frame_decryptor_(frame_decryptor) {
   RTC_DCHECK(module_process_thread);
   RTC_DCHECK(audio_device_module);
   AudioCodingModule::Config acm_config;
@@ -425,7 +427,38 @@
   WebRtcRTPHeader webrtc_rtp_header = {};
   webrtc_rtp_header.header = header;
 
-  const size_t payload_data_length = payload_length - header.paddingLength;
+  size_t payload_data_length = payload_length - header.paddingLength;
+
+  // E2EE Custom Audio Frame Decryption (This is optional).
+  // Keep this buffer around for the lifetime of the OnReceivedPayloadData call.
+  rtc::Buffer decrypted_audio_payload;
+  if (frame_decryptor_ != nullptr) {
+    size_t max_plaintext_size = frame_decryptor_->GetMaxPlaintextByteSize(
+        cricket::MEDIA_TYPE_AUDIO, payload_length);
+    decrypted_audio_payload.SetSize(max_plaintext_size);
+
+    size_t bytes_written = 0;
+    std::vector<uint32_t> csrcs(header.arrOfCSRCs,
+                                header.arrOfCSRCs + header.numCSRCs);
+    int decrypt_status = frame_decryptor_->Decrypt(
+        cricket::MEDIA_TYPE_AUDIO, csrcs,
+        /*additional_data=*/nullptr,
+        rtc::ArrayView<const uint8_t>(payload, payload_data_length),
+        decrypted_audio_payload, &bytes_written);
+
+    // In this case just interpret the failure as a silent frame.
+    if (decrypt_status != 0) {
+      bytes_written = 0;
+    }
+
+    // Resize the decrypted audio payload to the number of bytes actually
+    // written.
+    decrypted_audio_payload.SetSize(bytes_written);
+    // Update the final payload.
+    payload = decrypted_audio_payload.data();
+    payload_data_length = decrypted_audio_payload.size();
+  }
+
   if (payload_data_length == 0) {
     webrtc_rtp_header.frameType = kEmptyFrame;
     return OnReceivedPayloadData(nullptr, 0, &webrtc_rtp_header);
diff --git a/audio/channel_receive.h b/audio/channel_receive.h
index 20198c4..2e089b7 100644
--- a/audio/channel_receive.h
+++ b/audio/channel_receive.h
@@ -42,6 +42,7 @@
 namespace webrtc {
 
 class AudioDeviceModule;
+class FrameDecryptorInterface;
 class PacketRouter;
 class ProcessThread;
 class RateLimiter;
@@ -112,7 +113,8 @@
                  size_t jitter_buffer_max_packets,
                  bool jitter_buffer_fast_playout,
                  rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
-                 absl::optional<AudioCodecPairId> codec_pair_id);
+                 absl::optional<AudioCodecPairId> codec_pair_id,
+                 FrameDecryptorInterface* frame_decryptor);
   virtual ~ChannelReceive();
 
   void SetSink(AudioSinkInterface* sink);
@@ -263,6 +265,9 @@
   PacketRouter* packet_router_ = nullptr;
 
   rtc::ThreadChecker construction_thread_;
+
+  // E2EE Audio Frame Decryption
+  FrameDecryptorInterface* frame_decryptor_ = nullptr;
 };
 
 }  // namespace voe
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index 0c9328f..d3748b3 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -19,6 +19,7 @@
 
 #include "absl/memory/memory.h"
 #include "api/array_view.h"
+#include "api/crypto/frameencryptorinterface.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "call/rtp_transport_controller_send_interface.h"
 #include "logging/rtc_event_log/events/rtc_event_audio_playout.h"
@@ -260,6 +261,35 @@
     _rtpRtcpModule->SetAudioLevel(rms_level_.Average());
   }
 
+  // E2EE Custom Audio Frame Encryption (This is optional).
+  // Keep this buffer around for the lifetime of the send call.
+  rtc::Buffer encrypted_audio_payload;
+  if (frame_encryptor_ != nullptr) {
+    // TODO(benwright@webrtc.org) - Allocate enough to always encrypt inline.
+    // Allocate a buffer to hold the maximum possible encrypted payload.
+    size_t max_ciphertext_size = frame_encryptor_->GetMaxCiphertextByteSize(
+        cricket::MEDIA_TYPE_AUDIO, payloadSize);
+    encrypted_audio_payload.SetSize(max_ciphertext_size);
+
+    // Encrypt the audio payload into the buffer.
+    size_t bytes_written = 0;
+    int encrypt_status = frame_encryptor_->Encrypt(
+        cricket::MEDIA_TYPE_AUDIO, _rtpRtcpModule->SSRC(),
+        /*additional_data=*/nullptr,
+        rtc::ArrayView<const uint8_t>(payloadData, payloadSize),
+        encrypted_audio_payload, &bytes_written);
+    if (encrypt_status != 0) {
+      RTC_DLOG(LS_ERROR) << "Channel::SendData() failed encrypt audio payload: "
+                         << encrypt_status;
+      return -1;
+    }
+    // Resize the buffer to the exact number of bytes actually used.
+    encrypted_audio_payload.SetSize(bytes_written);
+    // Rewrite the payloadData and size to the new encrypted payload.
+    payloadData = encrypted_audio_payload.data();
+    payloadSize = encrypted_audio_payload.size();
+  }
+
   // Push data from ACM to RTP/RTCP-module to deliver audio frame for
   // packetization.
   // This call will trigger Transport::SendPacket() from the RTP/RTCP module.
@@ -322,7 +352,8 @@
 ChannelSend::ChannelSend(rtc::TaskQueue* encoder_queue,
                          ProcessThread* module_process_thread,
                          RtcpRttStats* rtcp_rtt_stats,
-                         RtcEventLog* rtc_event_log)
+                         RtcEventLog* rtc_event_log,
+                         FrameEncryptorInterface* frame_encryptor)
     : event_log_(rtc_event_log),
       _timeStamp(0),  // This is just an offset, RTP module will add it's own
                       // random offset
@@ -342,7 +373,8 @@
                                                    kMaxRetransmissionWindowMs)),
       use_twcc_plr_for_ana_(
           webrtc::field_trial::FindFullName("UseTwccPlrForAna") == "Enabled"),
-      encoder_queue_(encoder_queue) {
+      encoder_queue_(encoder_queue),
+      frame_encryptor_(frame_encryptor) {
   RTC_DCHECK(module_process_thread);
   RTC_DCHECK(encoder_queue);
   audio_coding_.reset(AudioCodingModule::Create(AudioCodingModule::Config()));
@@ -949,5 +981,16 @@
   return rtt;
 }
 
+void ChannelSend::SetFrameEncryptor(FrameEncryptorInterface* frame_encryptor) {
+  rtc::CritScope cs(&encoder_queue_lock_);
+  if (encoder_queue_is_active_) {
+    encoder_queue_->PostTask([this, frame_encryptor]() {
+      this->frame_encryptor_ = frame_encryptor;
+    });
+  } else {
+    frame_encryptor_ = frame_encryptor;
+  }
+}
+
 }  // namespace voe
 }  // namespace webrtc
diff --git a/audio/channel_send.h b/audio/channel_send.h
index 4569201..ef92f8e 100644
--- a/audio/channel_send.h
+++ b/audio/channel_send.h
@@ -37,6 +37,7 @@
 
 namespace webrtc {
 
+class FrameEncryptorInterface;
 class PacketRouter;
 class ProcessThread;
 class RateLimiter;
@@ -118,7 +119,8 @@
   ChannelSend(rtc::TaskQueue* encoder_queue,
               ProcessThread* module_process_thread,
               RtcpRttStats* rtcp_rtt_stats,
-              RtcEventLog* rtc_event_log);
+              RtcEventLog* rtc_event_log,
+              FrameEncryptorInterface* frame_encryptor);
 
   virtual ~ChannelSend();
 
@@ -222,6 +224,9 @@
 
   int64_t GetRTT() const;
 
+  // E2EE Custom Audio Frame Encryption
+  void SetFrameEncryptor(FrameEncryptorInterface* frame_encryptor);
+
  private:
   class ProcessAndEncodeAudioTask;
 
@@ -290,6 +295,9 @@
   rtc::CriticalSection encoder_queue_lock_;
   bool encoder_queue_is_active_ RTC_GUARDED_BY(encoder_queue_lock_) = false;
   rtc::TaskQueue* encoder_queue_ = nullptr;
+
+  // E2EE Audio Frame Encryption
+  FrameEncryptorInterface* frame_encryptor_ = nullptr;
 };
 
 }  // namespace voe
diff --git a/audio/channel_send_proxy.cc b/audio/channel_send_proxy.cc
index a4d8b69..8091bdc 100644
--- a/audio/channel_send_proxy.cc
+++ b/audio/channel_send_proxy.cc
@@ -197,5 +197,11 @@
   return channel_.get();
 }
 
+void ChannelSendProxy::SetFrameEncryptor(
+    FrameEncryptorInterface* frame_encryptor) {
+  RTC_DCHECK(worker_thread_checker_.CalledOnValidThread());
+  channel_->SetFrameEncryptor(frame_encryptor);
+}
+
 }  // namespace voe
 }  // namespace webrtc
diff --git a/audio/channel_send_proxy.h b/audio/channel_send_proxy.h
index 754f9f6..1b8b4a0 100644
--- a/audio/channel_send_proxy.h
+++ b/audio/channel_send_proxy.h
@@ -23,6 +23,7 @@
 
 namespace webrtc {
 
+class FrameEncryptorInterface;
 class RtcpBandwidthObserver;
 class RtpRtcp;
 class RtpTransportControllerSendInterface;
@@ -84,6 +85,9 @@
   // Needed by ChannelReceiveProxy::AssociateSendChannel.
   virtual ChannelSend* GetChannel() const;
 
+  // E2EE Custom Audio Frame Encryption (Optional)
+  virtual void SetFrameEncryptor(FrameEncryptorInterface* frame_encryptor);
+
  private:
   // Thread checkers document and lock usage of some methods on voe::Channel to
   // specific threads we know about. The goal is to eventually split up
diff --git a/audio/mock_voe_channel_proxy.h b/audio/mock_voe_channel_proxy.h
index 0ae3cdc..88a50ea 100644
--- a/audio/mock_voe_channel_proxy.h
+++ b/audio/mock_voe_channel_proxy.h
@@ -105,6 +105,8 @@
                void(float recoverable_packet_loss_rate));
   MOCK_METHOD0(StartSend, void());
   MOCK_METHOD0(StopSend, void());
+  MOCK_METHOD1(SetFrameEncryptor,
+               void(FrameEncryptorInterface* frame_encryptor));
 };
 }  // namespace test
 }  // namespace webrtc