Add mute state field to AudioFrame and switch some callers to use it. Also make AudioFrame::data_ private and instead provide:

const int16_t* data() const;
int16_t* mutable_data();

- data() returns a zeroed static buffer on muted frames (to avoid unnecessary zeroing of the member buffer) and directly returns AudioFrame::data_ on unmuted frames.
- mutable_data(), lazily zeroes AudioFrame::data_ if the frame is currently muted, sets muted=false, and returns AudioFrame::data_.

These accessors serve to "force" callers to be aware of the mute state field, i.e. lazy zeroing is not the primary motivation.

This change only optimizes handling of muted frames where it is somewhat trivial to do so. Other improvements requiring more significant structural changes will come later.

BUG=webrtc:7343
TBR=henrika

Review-Url: https://codereview.webrtc.org/2750783004
Cr-Commit-Position: refs/heads/master@{#18543}
diff --git a/webrtc/audio/audio_transport_proxy.cc b/webrtc/audio/audio_transport_proxy.cc
index 4d2f9e3..d6ce939 100644
--- a/webrtc/audio/audio_transport_proxy.cc
+++ b/webrtc/audio/audio_transport_proxy.cc
@@ -25,9 +25,11 @@
   resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
                                 number_of_channels);
 
+  // TODO(yujo): make resampler take an AudioFrame, and add special case
+  // handling of muted frames.
   return resampler->Resample(
-      frame.data_, frame.samples_per_channel_ * number_of_channels, destination,
-      number_of_channels * target_number_of_samples_per_channel);
+      frame.data(), frame.samples_per_channel_ * number_of_channels,
+      destination, number_of_channels * target_number_of_samples_per_channel);
 }
 }  // namespace
 
@@ -77,7 +79,7 @@
   // 100 = 1 second / data duration (10 ms).
   RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
   RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
 
   mixer_->Mix(nChannels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
@@ -120,7 +122,7 @@
 
   // 8 = bits per byte.
   RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
   mixer_->Mix(number_of_channels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
   *ntp_time_ms = mixed_frame_.ntp_time_ms_;
diff --git a/webrtc/audio/utility/audio_frame_operations.cc b/webrtc/audio/utility/audio_frame_operations.cc
index 0338b46..beb3e4c 100644
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@@ -32,7 +32,7 @@
   RTC_DCHECK_GT(result_frame->num_channels_, 0);
   RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
 
-  bool no_previous_data = false;
+  bool no_previous_data = result_frame->muted();
   if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
     // Special case we have no data to start with.
     RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
@@ -51,21 +51,21 @@
   if (result_frame->speech_type_ != frame_to_add.speech_type_)
     result_frame->speech_type_ = AudioFrame::kUndefined;
 
-  if (no_previous_data) {
-    std::copy(frame_to_add.data_, frame_to_add.data_ +
-                                      frame_to_add.samples_per_channel_ *
-                                          result_frame->num_channels_,
-              result_frame->data_);
-  } else {
-    for (size_t i = 0;
-         i < result_frame->samples_per_channel_ * result_frame->num_channels_;
-         i++) {
-      const int32_t wrap_guard = static_cast<int32_t>(result_frame->data_[i]) +
-                           static_cast<int32_t>(frame_to_add.data_[i]);
-      result_frame->data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!frame_to_add.muted()) {
+    const int16_t* in_data = frame_to_add.data();
+    int16_t* out_data = result_frame->mutable_data();
+    size_t length =
+        frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
+    if (no_previous_data) {
+      std::copy(in_data, in_data + length, out_data);
+    } else {
+      for (size_t i = 0; i < length; i++) {
+        const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
+                                   static_cast<int32_t>(in_data[i]);
+        out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
-  return;
 }
 
 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
@@ -86,10 +86,13 @@
     return -1;
   }
 
-  int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
-  memcpy(data_copy, frame->data_,
-         sizeof(int16_t) * frame->samples_per_channel_);
-  MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    // TODO(yujo): this operation can be done in place.
+    int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
+    memcpy(data_copy, frame->data(),
+           sizeof(int16_t) * frame->samples_per_channel_);
+    MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -112,7 +115,10 @@
   RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
                 AudioFrame::kMaxDataSizeSamples);
 
-  StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    StereoToMono(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -138,7 +144,10 @@
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToStereo(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -162,7 +171,10 @@
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToMono(frame->data(), frame->samples_per_channel_,
+               frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -203,14 +215,15 @@
 
 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
   RTC_DCHECK(frame);
-  if (frame->num_channels_ != 2) {
+  if (frame->num_channels_ != 2 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    int16_t temp_data = frame->data_[i];
-    frame->data_[i] = frame->data_[i + 1];
-    frame->data_[i + 1] = temp_data;
+    int16_t temp_data = frame_data[i];
+    frame_data[i] = frame_data[i + 1];
+    frame_data[i + 1] = temp_data;
   }
 }
 
@@ -224,8 +237,13 @@
     // Frame fully muted.
     size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
     RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
-    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+    frame->Mute();
   } else {
+    // Fade is a no-op on a muted frame.
+    if (frame->muted()) {
+      return;
+    }
+
     // Limit number of samples to fade, if frame isn't long enough.
     size_t count = kMuteFadeFrames;
     float inc = kMuteFadeInc;
@@ -252,12 +270,13 @@
     }
 
     // Perform fade.
+    int16_t* frame_data = frame->mutable_data();
     size_t channels = frame->num_channels_;
     for (size_t j = 0; j < channels; ++j) {
       float g = start_g;
       for (size_t i = start * channels; i < end * channels; i += channels) {
         g += inc;
-        frame->data_[i + j] *= g;
+        frame_data[i + j] *= g;
       }
     }
   }
@@ -270,43 +289,41 @@
 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
   RTC_DCHECK(frame);
   RTC_DCHECK_GT(frame->num_channels_, 0);
-  if (frame->num_channels_ < 1) {
+  if (frame->num_channels_ < 1 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    frame->data_[i] = frame->data_[i] >> 1;
+    frame_data[i] = frame_data[i] >> 1;
   }
 }
 
 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
   if (frame->num_channels_ != 2) {
     return -1;
+  } else if (frame->muted()) {
+    return 0;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[2 * i] = static_cast<int16_t>(left * frame->data_[2 * i]);
-    frame->data_[2 * i + 1] =
-        static_cast<int16_t>(right * frame->data_[2 * i + 1]);
+    frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
+    frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
   }
   return 0;
 }
 
 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
-  int32_t temp_data = 0;
+  if (frame->muted()) {
+    return 0;
+  }
 
-  // Ensure that the output result is saturated [-32768, +32767].
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    temp_data = static_cast<int32_t>(scale * frame->data_[i]);
-    if (temp_data < -32768) {
-      frame->data_[i] = -32768;
-    } else if (temp_data > 32767) {
-      frame->data_[i] = 32767;
-    } else {
-      frame->data_[i] = static_cast<int16_t>(temp_data);
-    }
+    frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
   }
   return 0;
 }
diff --git a/webrtc/audio/utility/audio_frame_operations_unittest.cc b/webrtc/audio/utility/audio_frame_operations_unittest.cc
index 096ea38..e3e9804 100644
--- a/webrtc/audio/utility/audio_frame_operations_unittest.cc
+++ b/webrtc/audio/utility/audio_frame_operations_unittest.cc
@@ -32,24 +32,28 @@
                   int16_t ch3,
                   int16_t ch4,
                   AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) {
-    frame->data_[i] = ch1;
-    frame->data_[i + 1] = ch2;
-    frame->data_[i + 2] = ch3;
-    frame->data_[i + 3] = ch4;
+    frame_data[i] = ch1;
+    frame_data[i + 1] = ch2;
+    frame_data[i + 2] = ch3;
+    frame_data[i + 3] = ch4;
   }
 }
 
 void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void SetFrameData(int16_t data, AudioFrame* frame) {
-  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = data;
+  int16_t* frame_data = frame->mutable_data();
+  for (size_t i = 0;
+       i < frame->samples_per_channel_ * frame->num_channels_; i++) {
+    frame_data[i] = data;
   }
 }
 
@@ -57,10 +61,13 @@
   EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
   EXPECT_EQ(frame1.samples_per_channel_,
             frame2.samples_per_channel_);
+  const int16_t* frame1_data = frame1.data();
+  const int16_t* frame2_data = frame2.data();
   for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
       i++) {
-    EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
+    EXPECT_EQ(frame1_data[i], frame2_data[i]);
   }
+  EXPECT_EQ(frame1.muted(), frame2.muted());
 }
 
 void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
@@ -81,7 +88,7 @@
 int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
   RTC_DCHECK_LT(channel, frame.num_channels_);
   RTC_DCHECK_LT(index, frame.samples_per_channel_);
-  return frame.data_[index * frame.num_channels_ + channel];
+  return frame.data()[index * frame.num_channels_ + channel];
 }
 
 void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
@@ -114,6 +121,13 @@
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
+  frame_.num_channels_ = 1;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 1;
@@ -122,8 +136,8 @@
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::MonoToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
@@ -148,6 +162,12 @@
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
   AudioFrame target_frame;
   SetFrameData(4, 2, &frame_);
@@ -155,8 +175,8 @@
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::StereoToMono(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -196,6 +216,13 @@
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -204,8 +231,8 @@
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToMono(frame_.data_, frame_.samples_per_channel_,
-                                   target_frame.data_);
+  AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_,
+                                   target_frame.mutable_data());
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
@@ -244,6 +271,13 @@
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -252,8 +286,8 @@
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
   stereo_frame.num_channels_ = 2;
@@ -285,6 +319,12 @@
   VerifyFramesAreEqual(swapped_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::SwapStereoChannels(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
   frame_.num_channels_ = 1;
   // Set data to "stereo", despite it being a mono frame.
@@ -313,9 +353,9 @@
   AudioFrameOperations::Mute(&frame_, true, true);
 
   AudioFrame muted_frame;
-  muted_frame.samples_per_channel_ = 320;
-  muted_frame.num_channels_ = 2;
-  SetFrameData(0, 0, &muted_frame);
+  muted_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  muted_frame.num_channels_ = frame_.num_channels_;
+  ASSERT_TRUE(muted_frame.muted());
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
@@ -423,6 +463,36 @@
   EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
 }
 
+TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, false, true);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, true, false);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) {
+  SetFrameData(2, &frame_);
+
+  AudioFrame half_gain_frame;
+  half_gain_frame.num_channels_ = frame_.num_channels_;
+  half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  SetFrameData(1, &half_gain_frame);
+
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  VerifyFramesAreEqual(half_gain_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
   frame_.num_channels_ = 1;
@@ -459,6 +529,12 @@
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should fail with a negative scale.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) {
   EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_));
@@ -493,25 +569,61 @@
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) {
   // When samples_per_channel_ is 0, the frame counts as empty and zero.
   AudioFrame frame_to_add_to;
+  frame_to_add_to.mutable_data();  // Unmute the frame.
+  ASSERT_FALSE(frame_to_add_to.muted());
   frame_to_add_to.samples_per_channel_ = 0;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
 
+  SetFrameData(1000, &frame_);
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
 
+TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) {
+  AudioFrame frame_to_add_to;
+  ASSERT_TRUE(frame_to_add_to.muted());
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+
+  SetFrameData(1000, &frame_);
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_, frame_to_add_to);
+}
+
+TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) {
+  AudioFrame frame_to_add_to;
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+  SetFrameData(1000, &frame_to_add_to);
+
+  AudioFrame frame_copy;
+  frame_copy.CopyFrom(frame_to_add_to);
+
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_copy, frame_to_add_to);
+}
+
 TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) {
   AudioFrame frame_to_add_to;
   frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
   SetFrameData(1000, &frame_to_add_to);
+  SetFrameData(2000, &frame_);
 
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
-  SetFrameData(frame_.data_[0] + 1000, &frame_);
+  SetFrameData(frame_.data()[0] + 1000, &frame_);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
+
 }  // namespace
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index 553265e..a2a5eb7 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -154,10 +154,11 @@
   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
   // from NetEq changes. See WebRTC issue 3923.
   if (need_resampling) {
+    // TODO(yujo): handle this more efficiently for muted frames.
     int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_frame->data_, current_sample_rate_hz, desired_freq_hz,
+        audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
         audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        audio_frame->data_);
+        audio_frame->mutable_data());
     if (samples_per_channel_int < 0) {
       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
       return -1;
@@ -175,7 +176,7 @@
   }
 
   // Store current audio in |last_audio_buffer_| for next time.
-  memcpy(last_audio_buffer_.get(), audio_frame->data_,
+  memcpy(last_audio_buffer_.get(), audio_frame->data(),
          sizeof(int16_t) * audio_frame->samples_per_channel_ *
              audio_frame->num_channels_);
 
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
index 05f4e11..af23e17 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
@@ -103,8 +103,7 @@
     frame.sample_rate_hz_ = codec.plfreq;
     frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
     frame.num_channels_ = codec.channels;
-    memset(frame.data_, 0, frame.samples_per_channel_ * frame.num_channels_ *
-           sizeof(int16_t));
+    frame.Mute();
     packet_sent_ = false;
     last_packet_send_timestamp_ = timestamp_;
     while (!packet_sent_) {
diff --git a/webrtc/modules/audio_coding/acm2/acm_send_test.cc b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
index d5388f8..787bea8 100644
--- a/webrtc/modules/audio_coding/acm2/acm_send_test.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
@@ -86,13 +86,13 @@
   // Insert audio and process until one packet is produced.
   while (clock_.TimeInMilliseconds() < test_duration_ms_) {
     clock_.AdvanceTimeMilliseconds(kBlockSizeMs);
-    RTC_CHECK(
-        audio_source_->Read(input_block_size_samples_, input_frame_.data_));
+    RTC_CHECK(audio_source_->Read(input_block_size_samples_,
+                                  input_frame_.mutable_data()));
     if (input_frame_.num_channels_ > 1) {
-      InputAudioFile::DuplicateInterleaved(input_frame_.data_,
+      InputAudioFile::DuplicateInterleaved(input_frame_.data(),
                                            input_block_size_samples_,
                                            input_frame_.num_channels_,
-                                           input_frame_.data_);
+                                           input_frame_.mutable_data());
     }
     data_to_send_ = false;
     RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0);
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
index 551ae05..2fcbecf 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@@ -325,24 +325,37 @@
 int DownMix(const AudioFrame& frame,
             size_t length_out_buff,
             int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
+  RTC_DCHECK_EQ(frame.num_channels_, 2);
+  RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = 0; n < frame.samples_per_channel_; ++n) {
+      out_buff[n] = static_cast<int16_t>(
+          (static_cast<int32_t>(frame_data[2 * n]) +
+           static_cast<int32_t>(frame_data[2 * n + 1])) >> 1);
+    }
+  } else {
+    memset(out_buff, 0, frame.samples_per_channel_);
   }
-  for (size_t n = 0; n < frame.samples_per_channel_; ++n)
-    out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1;
   return 0;
 }
 
 // Mono-to-stereo can be used as in-place.
 int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
-  }
-  for (size_t n = frame.samples_per_channel_; n != 0; --n) {
-    size_t i = n - 1;
-    int16_t sample = frame.data_[i];
-    out_buff[2 * i + 1] = sample;
-    out_buff[2 * i] = sample;
+  RTC_DCHECK_EQ(frame.num_channels_, 1);
+  RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = frame.samples_per_channel_; n != 0; --n) {
+      size_t i = n - 1;
+      int16_t sample = frame_data[i];
+      out_buff[2 * i + 1] = sample;
+      out_buff[2 * i] = sample;
+    }
+  } else {
+    memset(out_buff, 0, 2 * frame.samples_per_channel_);
   }
   return 0;
 }
@@ -725,12 +738,13 @@
 
   // When adding data to encoders this pointer is pointing to an audio buffer
   // with correct number of channels.
-  const int16_t* ptr_audio = ptr_frame->data_;
+  const int16_t* ptr_audio = ptr_frame->data();
 
   // For pushing data to primary, point the |ptr_audio| to correct buffer.
   if (!same_num_channels)
     ptr_audio = input_data->buffer;
 
+  // TODO(yujo): Skip encode of muted frames.
   input_data->input_timestamp = ptr_frame->timestamp_;
   input_data->audio = ptr_audio;
   input_data->length_per_channel = ptr_frame->samples_per_channel_;
@@ -744,6 +758,7 @@
 // encoders has to be mono for down-mix to take place.
 // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
 // is required, |*ptr_out| points to |in_frame|.
+// TODO(yujo): Make this more efficient for muted frames.
 int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
                                                const AudioFrame** ptr_out) {
   const bool resample =
@@ -793,13 +808,12 @@
   *ptr_out = &preprocess_frame_;
   preprocess_frame_.num_channels_ = in_frame.num_channels_;
   int16_t audio[WEBRTC_10MS_PCM_AUDIO];
-  const int16_t* src_ptr_audio = in_frame.data_;
-  int16_t* dest_ptr_audio = preprocess_frame_.data_;
+  const int16_t* src_ptr_audio = in_frame.data();
   if (down_mix) {
     // If a resampling is required the output of a down-mix is written into a
     // local buffer, otherwise, it will be written to the output frame.
-    if (resample)
-      dest_ptr_audio = audio;
+    int16_t* dest_ptr_audio = resample ?
+        audio : preprocess_frame_.mutable_data();
     if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
       return -1;
     preprocess_frame_.num_channels_ = 1;
@@ -813,7 +827,7 @@
   // If it is required, we have to do a resampling.
   if (resample) {
     // The result of the resampler is written to output frame.
-    dest_ptr_audio = preprocess_frame_.data_;
+    int16_t* dest_ptr_audio = preprocess_frame_.mutable_data();
 
     int samples_per_channel = resampler_.Resample10Msec(
         src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
index 99fef79..236501a 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@@ -175,9 +175,7 @@
     input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000;  // 10 ms.
     static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples,
                   "audio frame too small");
-    memset(input_frame_.data_,
-           0,
-           input_frame_.samples_per_channel_ * sizeof(input_frame_.data_[0]));
+    input_frame_.Mute();
 
     ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_));
 
@@ -698,7 +696,7 @@
     // TODO(kwiberg): Use std::copy here. Might be complications because AFAICS
     // this call confuses the number of samples with the number of bytes, and
     // ends up copying only half of what it should.
-    memcpy(input_frame_.data_, audio_loop_.GetNextBlock().data(),
+    memcpy(input_frame_.mutable_data(), audio_loop_.GetNextBlock().data(),
            kNumSamples10ms);
     AudioCodingModuleTestOldApi::InsertAudio();
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
index cc1374f..ecdcafa 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
@@ -200,8 +200,10 @@
     // Get audio from external decoder instance.
     GetOutputAudio(&output_);
 
+    const int16_t* output_data = output_.data();
+    const int16_t* output_internal_data = output_internal_.data();
     for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
-      ASSERT_EQ(output_.data_[i], output_internal_.data_[i])
+      ASSERT_EQ(output_data[i], output_internal_data[i])
           << "Diff in sample " << i << ".";
     }
   }
@@ -298,8 +300,9 @@
     }
 
     ASSERT_EQ(1u, output.num_channels_);
+    const int16_t* output_data = output.data();
     for (size_t i = 0; i < output.samples_per_channel_; ++i) {
-      if (output.data_[i] != 0)
+      if (output_data[i] != 0)
         return;
     }
     EXPECT_TRUE(false)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index f512d75..f9ec3bb 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -11,7 +11,6 @@
 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
 
 #include <assert.h>
-#include <memory.h>  // memset
 
 #include <algorithm>
 #include <utility>
@@ -1063,16 +1062,17 @@
                   << ") != output_size_samples_ (" << output_size_samples_
                   << ")";
     // TODO(minyue): treatment of under-run, filling zeros
-    memset(audio_frame->data_, 0, num_output_samples * sizeof(int16_t));
+    audio_frame->Mute();
     return kSampleUnderrun;
   }
 
   // Should always have overlap samples left in the |sync_buffer_|.
   RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
 
+  // TODO(yujo): For muted frames, this can be a copy rather than an addition.
   if (play_dtmf) {
-    return_value =
-        DtmfOverdub(dtmf_event, sync_buffer_->Channels(), audio_frame->data_);
+    return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
+                               audio_frame->mutable_data());
   }
 
   // Update the background noise parameters if last operation wrote data
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index d1703e9..b6c4a77 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -216,7 +216,7 @@
         1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, -3864, -3516,
         -2534, -1163 });
     ASSERT_GE(kMaxOutputSize, kOutput.size());
-    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data_));
+    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data()));
   }
 
   std::unique_ptr<NetEqImpl> neteq_;
@@ -525,7 +525,7 @@
   // Wrap the expected value in an rtc::Optional to compare them as such.
   EXPECT_EQ(
       rtc::Optional<uint32_t>(rtp_header.timestamp +
-                              output.data_[output.samples_per_channel_ - 1]),
+                              output.data()[output.samples_per_channel_ - 1]),
       neteq_->GetPlayoutTimestamp());
 
   // Check the timestamp for the last value in the sync buffer. This should
@@ -538,7 +538,7 @@
   // Check that the number of samples still to play from the sync buffer add
   // up with what was already played out.
   EXPECT_EQ(
-      kPayloadLengthSamples - output.data_[output.samples_per_channel_ - 1],
+      kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1],
       sync_buffer->FutureLength());
 }
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
index 37a0783..7c25dd4 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
@@ -165,10 +165,12 @@
   }
 
   virtual void VerifyOutput(size_t num_samples) {
+    const int16_t* output_data = output_.data();
+    const int16_t* output_multi_channel_data = output_multi_channel_.data();
     for (size_t i = 0; i < num_samples; ++i) {
       for (size_t j = 0; j < num_channels_; ++j) {
-        ASSERT_EQ(output_.data_[i],
-                  output_multi_channel_.data_[i * num_channels_ + j])
+        ASSERT_EQ(output_data[i],
+                  output_multi_channel_data[i * num_channels_ + j])
             << "Diff in sample " << i << ", channel " << j << ".";
       }
     }
@@ -359,16 +361,18 @@
   // TODO(hlundin): NetEq is not giving bitexact results for these cases.
   virtual void VerifyOutput(size_t num_samples) {
     for (size_t i = 0; i < num_samples; ++i) {
+      const int16_t* output_data = output_.data();
+      const int16_t* output_multi_channel_data = output_multi_channel_.data();
       auto first_channel_sample =
-          output_multi_channel_.data_[i * num_channels_];
+          output_multi_channel_data[i * num_channels_];
       for (size_t j = 0; j < num_channels_; ++j) {
         const int kErrorMargin = 200;
-        EXPECT_NEAR(output_.data_[i],
-                    output_multi_channel_.data_[i * num_channels_ + j],
+        EXPECT_NEAR(output_data[i],
+                    output_multi_channel_data[i * num_channels_ + j],
                     kErrorMargin)
             << "Diff in sample " << i << ", channel " << j << ".";
         EXPECT_EQ(first_channel_sample,
-                  output_multi_channel_.data_[i * num_channels_ + j]);
+                  output_multi_channel_data[i * num_channels_ + j]);
       }
     }
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
index 5399f2a..fae1e23 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@@ -155,9 +155,7 @@
   explicit ResultSink(const std::string& output_file);
   ~ResultSink();
 
-  template<typename T, size_t n> void AddResult(
-      const T (&test_results)[n],
-      size_t length);
+  template<typename T> void AddResult(const T* test_results, size_t length);
 
   void AddResult(const NetEqNetworkStatistics& stats);
   void AddResult(const RtcpStatistics& stats);
@@ -183,12 +181,12 @@
     fclose(output_fp_);
 }
 
-template<typename T, size_t n>
-void ResultSink::AddResult(const T (&test_results)[n], size_t length) {
+template<typename T>
+void ResultSink::AddResult(const T* test_results, size_t length) {
   if (output_fp_) {
-    ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
+    ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
   }
-  digest_->Update(&test_results, sizeof(T) * length);
+  digest_->Update(test_results, sizeof(T) * length);
 }
 
 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
@@ -376,7 +374,7 @@
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
     ASSERT_NO_FATAL_FAILURE(Process());
     ASSERT_NO_FATAL_FAILURE(output.AddResult(
-        out_frame_.data_, out_frame_.samples_per_channel_));
+        out_frame_.data(), out_frame_.samples_per_channel_));
 
     // Query the network statistics API once per second
     if (sim_clock_ % 1000 == 0) {
@@ -850,8 +848,9 @@
   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
@@ -868,29 +867,23 @@
 #elif defined(WEBRTC_CODEC_ISACFX)
   EXPECT_EQ(ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH, neteq_->LastDecoderError());
 #endif
-  // Verify that the first 160 samples are set to 0, and that the remaining
-  // samples are left unmodified.
+  // Verify that the first 160 samples are set to 0.
   static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
-  }
-  for (size_t i = kExpectedOutputLength; i < AudioFrame::kMaxDataSizeSamples;
-       ++i) {
-    std::ostringstream ss;
-    ss << "i = " << i;
-    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(1, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
 }
 
 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
@@ -898,11 +891,12 @@
   // Verify that the first block of samples is set to 0.
   static const int kExpectedOutputLength =
       kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
   // Verify that the sample rate did not change from the initial configuration.
   EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
@@ -989,7 +983,8 @@
     bool plc_to_cng = false;
     for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
       output.Reset();
-      memset(output.data_, 1, sizeof(output.data_));  // Set to non-zero.
+      // Set to non-zero.
+      memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
       ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
       ASSERT_FALSE(muted);
       ASSERT_EQ(1u, output.num_channels_);
@@ -997,9 +992,10 @@
       if (output.speech_type_ == AudioFrame::kPLCCNG) {
         plc_to_cng = true;
         double sum_squared = 0;
+        const int16_t* output_data = output.data();
         for (size_t k = 0;
              k < output.num_channels_ * output.samples_per_channel_; ++k)
-          sum_squared += output.data_[k] * output.data_[k];
+          sum_squared += output_data[k] * output_data[k];
         TestCondition(sum_squared, n > kFadingThreshold);
       } else {
         EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
@@ -1356,14 +1352,15 @@
   // Verify that output audio is not written during muted mode. Other parameters
   // should be correct, though.
   AudioFrame new_frame;
-  for (auto& d : new_frame.data_) {
-    d = 17;
+  int16_t* frame_data = new_frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
   EXPECT_TRUE(muted);
-  for (auto d : new_frame.data_) {
-    EXPECT_EQ(17, d);
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    EXPECT_EQ(17, frame_data[i]);
   }
   EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
             new_frame.timestamp_);
@@ -1522,8 +1519,8 @@
   if (!res)
     return res;
   if (memcmp(
-      a.data_, b.data_,
-      a.samples_per_channel_ * a.num_channels_ * sizeof(a.data_[0])) != 0) {
+      a.data(), b.data(),
+      a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
     return ::testing::AssertionFailure() << "data_ diff";
   }
   return ::testing::AssertionSuccess();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer.cc b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
index f841f75..9285bbc 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
@@ -76,7 +76,8 @@
   const size_t samples_to_read = std::min(FutureLength(), requested_len);
   output->Reset();
   const size_t tot_samples_read =
-      ReadInterleavedFromIndex(next_index_, samples_to_read, output->data_);
+      ReadInterleavedFromIndex(next_index_, samples_to_read,
+                               output->mutable_data());
   const size_t samples_read_per_channel = tot_samples_read / Channels();
   next_index_ += samples_read_per_channel;
   output->num_channels_ = Channels();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
index 5a6260c..cbf26e0 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
@@ -154,14 +154,14 @@
   EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_);
 
   // Verify the data.
-  int16_t* output_ptr = output1.data_;
+  const int16_t* output_ptr = output1.data();
   for (size_t i = 0; i < kNewLen / 2; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
       ++output_ptr;
     }
   }
-  output_ptr = output2.data_;
+  output_ptr = output2.data();
   for (size_t i = kNewLen / 2; i < kNewLen; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
diff --git a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
index 71b387a..5927b02 100644
--- a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
+++ b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
@@ -33,7 +33,7 @@
   // otherwise false.
   bool WriteAudioFrame(const AudioFrame& audio_frame) {
     return WriteArray(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
index 7b3a35b..eb026fe 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
@@ -406,7 +406,7 @@
     RTC_DCHECK_EQ(out_frame_.samples_per_channel_,
                   static_cast<size_t>(kOutputSizeMs * out_sampling_khz_));
     RTC_CHECK(output_->WriteArray(
-        out_frame_.data_,
+        out_frame_.data(),
         out_frame_.samples_per_channel_ * out_frame_.num_channels_));
     return static_cast<int>(out_frame_.samples_per_channel_);
   }
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
index 6ff46bc..34a1d50 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
@@ -103,7 +103,7 @@
 
       if (output_) {
         RTC_CHECK(output_->WriteArray(
-            out_frame.data_,
+            out_frame.data(),
             out_frame.samples_per_channel_ * out_frame.num_channels_));
       }
 
diff --git a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
index 87cd61c..24d0719 100644
--- a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
+++ b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
@@ -223,7 +223,7 @@
   if (_playoutLengthSmpls == 0) {
     return false;
   }
-  _pcmFile.Write10MsData(audioFrame.data_,
+  _pcmFile.Write10MsData(audioFrame.data(),
       audioFrame.samples_per_channel_ * audioFrame.num_channels_);
   return true;
 }
diff --git a/webrtc/modules/audio_coding/test/PCMFile.cc b/webrtc/modules/audio_coding/test/PCMFile.cc
index 5d2d818..03d4fa7 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.cc
+++ b/webrtc/modules/audio_coding/test/PCMFile.cc
@@ -125,11 +125,13 @@
     channels = 2;
   }
 
-  int32_t payload_size = (int32_t) fread(audio_frame.data_, sizeof(uint16_t),
+  int32_t payload_size = (int32_t) fread(audio_frame.mutable_data(),
+                                         sizeof(uint16_t),
                                          samples_10ms_ * channels, pcm_file_);
   if (payload_size < samples_10ms_ * channels) {
+    int16_t* frame_data = audio_frame.mutable_data();
     for (int k = payload_size; k < samples_10ms_ * channels; k++) {
-      audio_frame.data_[k] = 0;
+      frame_data[k] = 0;
     }
     if (auto_rewind_) {
       rewind(pcm_file_);
@@ -149,19 +151,20 @@
   return samples_10ms_;
 }
 
-void PCMFile::Write10MsData(AudioFrame& audio_frame) {
+void PCMFile::Write10MsData(const AudioFrame& audio_frame) {
   if (audio_frame.num_channels_ == 1) {
     if (!save_stereo_) {
-      if (fwrite(audio_frame.data_, sizeof(uint16_t),
+      if (fwrite(audio_frame.data(), sizeof(uint16_t),
                  audio_frame.samples_per_channel_, pcm_file_) !=
           static_cast<size_t>(audio_frame.samples_per_channel_)) {
         return;
       }
     } else {
+      const int16_t* frame_data = audio_frame.data();
       int16_t* stereo_audio = new int16_t[2 * audio_frame.samples_per_channel_];
       for (size_t k = 0; k < audio_frame.samples_per_channel_; k++) {
-        stereo_audio[k << 1] = audio_frame.data_[k];
-        stereo_audio[(k << 1) + 1] = audio_frame.data_[k];
+        stereo_audio[k << 1] = frame_data[k];
+        stereo_audio[(k << 1) + 1] = frame_data[k];
       }
       if (fwrite(stereo_audio, sizeof(int16_t),
                  2 * audio_frame.samples_per_channel_, pcm_file_) !=
@@ -171,7 +174,7 @@
       delete[] stereo_audio;
     }
   } else {
-    if (fwrite(audio_frame.data_, sizeof(int16_t),
+    if (fwrite(audio_frame.data(), sizeof(int16_t),
                audio_frame.num_channels_ * audio_frame.samples_per_channel_,
                pcm_file_) !=
         static_cast<size_t>(audio_frame.num_channels_ *
@@ -181,7 +184,8 @@
   }
 }
 
-void PCMFile::Write10MsData(int16_t* playout_buffer, size_t length_smpls) {
+void PCMFile::Write10MsData(const int16_t* playout_buffer,
+                            size_t length_smpls) {
   if (fwrite(playout_buffer, sizeof(uint16_t), length_smpls, pcm_file_) !=
       length_smpls) {
     return;
diff --git a/webrtc/modules/audio_coding/test/PCMFile.h b/webrtc/modules/audio_coding/test/PCMFile.h
index b5ced0b..63ab960 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.h
+++ b/webrtc/modules/audio_coding/test/PCMFile.h
@@ -33,8 +33,8 @@
 
   int32_t Read10MsData(AudioFrame& audio_frame);
 
-  void Write10MsData(int16_t *playout_buffer, size_t length_smpls);
-  void Write10MsData(AudioFrame& audio_frame);
+  void Write10MsData(const int16_t *playout_buffer, size_t length_smpls);
+  void Write10MsData(const AudioFrame& audio_frame);
 
   uint16_t PayloadLength10Ms() const;
   int32_t SamplingFrequency() const;
diff --git a/webrtc/modules/audio_coding/test/TestAllCodecs.cc b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
index 30f0226..12fe455 100644
--- a/webrtc/modules/audio_coding/test/TestAllCodecs.cc
+++ b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
@@ -457,7 +457,7 @@
     ASSERT_FALSE(muted);
 
     // Write output speech to file.
-    outfile_b_.Write10MsData(audio_frame.data_,
+    outfile_b_.Write10MsData(audio_frame.data(),
                              audio_frame.samples_per_channel_);
 
     // Update loop counter
diff --git a/webrtc/modules/audio_coding/test/TestRedFec.cc b/webrtc/modules/audio_coding/test/TestRedFec.cc
index 091cc84..4ec3ed1 100644
--- a/webrtc/modules/audio_coding/test/TestRedFec.cc
+++ b/webrtc/modules/audio_coding/test/TestRedFec.cc
@@ -464,7 +464,7 @@
     bool muted;
     EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted));
     ASSERT_FALSE(muted);
-    _outFileB.Write10MsData(audioFrame.data_, audioFrame.samples_per_channel_);
+    _outFileB.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_);
   }
   _inFileA.Rewind();
 }
diff --git a/webrtc/modules/audio_coding/test/TestStereo.cc b/webrtc/modules/audio_coding/test/TestStereo.cc
index 3d8efe0..02bc141 100644
--- a/webrtc/modules/audio_coding/test/TestStereo.cc
+++ b/webrtc/modules/audio_coding/test/TestStereo.cc
@@ -806,7 +806,7 @@
 
     // Write output speech to file
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/test/delay_test.cc b/webrtc/modules/audio_coding/test/delay_test.cc
index 846ac29..ce24493 100644
--- a/webrtc/modules/audio_coding/test/delay_test.cc
+++ b/webrtc/modules/audio_coding/test/delay_test.cc
@@ -209,7 +209,7 @@
                 acm_b_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted));
       RTC_DCHECK(!muted);
       out_file_b_.Write10MsData(
-          audio_frame.data_,
+          audio_frame.data(),
           audio_frame.samples_per_channel_ * audio_frame.num_channels_);
       received_ts = channel_a2b_->LastInTimestamp();
       rtc::Optional<uint32_t> playout_timestamp = acm_b_->PlayoutTimestamp();
diff --git a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
index 44ef9df..4fa4e52 100644
--- a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
+++ b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
@@ -147,7 +147,7 @@
       receive_acm_->PlayoutData10Ms(static_cast<int>(FLAGS_output_fs_hz),
                                     &frame_, &muted);
       ASSERT_FALSE(muted);
-      fwrite(frame_.data_, sizeof(frame_.data_[0]),
+      fwrite(frame_.data(), sizeof(*frame_.data()),
              frame_.samples_per_channel_ * frame_.num_channels_, pcm_out_fid_);
       *action |= kAudioPlayedOut;
     }
diff --git a/webrtc/modules/audio_coding/test/opus_test.cc b/webrtc/modules/audio_coding/test/opus_test.cc
index a558f1c..9f5720b 100644
--- a/webrtc/modules/audio_coding/test/opus_test.cc
+++ b/webrtc/modules/audio_coding/test/opus_test.cc
@@ -262,7 +262,7 @@
 
     // If input audio is sampled at 32 kHz, resampling to 48 kHz is required.
     EXPECT_EQ(480,
-              resampler_.Resample10Msec(audio_frame.data_,
+              resampler_.Resample10Msec(audio_frame.data(),
                                         audio_frame.sample_rate_hz_,
                                         48000,
                                         channels,
@@ -347,7 +347,7 @@
 
     // Write output speech to file.
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
 
     // Write stand-alone speech to file.
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
index 1e679af..8e7351d 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
@@ -41,12 +41,15 @@
 namespace webrtc {
 uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 {
+    if (audioFrame.muted()) return 0;
+
     uint32_t energy = 0;
+    const int16_t* frame_data = audioFrame.data();
     for(size_t position = 0; position < audioFrame.samples_per_channel_;
         position++)
     {
         // TODO(andrew): this can easily overflow.
-        energy += audioFrame.data_[position] * audioFrame.data_[position];
+        energy += frame_data[position] * frame_data[position];
     }
     return energy;
 }
@@ -54,24 +57,29 @@
 void RampIn(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[i] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[i] * frame_data[i]);
     }
 }
 
 void RampOut(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
         const size_t rampPos = rampSize - 1 - i;
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[rampPos] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[rampPos] *
+                                             frame_data[i]);
     }
-    memset(&audioFrame.data_[rampSize], 0,
+    memset(&frame_data[rampSize], 0,
            (audioFrame.samples_per_channel_ - rampSize) *
-           sizeof(audioFrame.data_[0]));
+           sizeof(frame_data[0]));
 }
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
index 218b8be..ce18d91 100644
--- a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
+++ b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
@@ -129,7 +129,7 @@
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_EQ(0, mixer->SetMixabilityStatus(&participants[i], true));
     EXPECT_CALL(participants[i], GetAudioFrame(_, _))
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
index 8aa0b5c..fff79a3 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
@@ -16,11 +16,16 @@
 namespace webrtc {
 
 uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame) {
+  if (audio_frame.muted()) {
+    return 0;
+  }
+
   uint32_t energy = 0;
+  const int16_t* frame_data = audio_frame.data();
   for (size_t position = 0; position < audio_frame.samples_per_channel_;
        position++) {
     // TODO(aleloi): This can overflow. Convert to floats.
-    energy += audio_frame.data_[position] * audio_frame.data_[position];
+    energy += frame_data[position] * frame_data[position];
   }
   return energy;
 }
@@ -29,7 +34,7 @@
   RTC_DCHECK(audio_frame);
   RTC_DCHECK_GE(start_gain, 0.0f);
   RTC_DCHECK_GE(target_gain, 0.0f);
-  if (start_gain == target_gain) {
+  if (start_gain == target_gain || audio_frame->muted()) {
     return;
   }
 
@@ -37,11 +42,12 @@
   RTC_DCHECK_LT(0, samples);
   float increment = (target_gain - start_gain) / samples;
   float gain = start_gain;
+  int16_t* frame_data = audio_frame->mutable_data();
   for (size_t i = 0; i < samples; ++i) {
     // If the audio is interleaved of several channels, we want to
     // apply the same gain change to the ith sample of every channel.
     for (size_t ch = 0; ch < audio_frame->num_channels_; ++ch) {
-      audio_frame->data_[audio_frame->num_channels_ * i + ch] *= gain;
+      frame_data[audio_frame->num_channels_ * i + ch] *= gain;
     }
     gain += increment;
   }
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
index 26258a2..e163d0f 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
@@ -23,8 +23,9 @@
                             AudioFrame* frame) {
   frame->num_channels_ = number_of_channels;
   frame->samples_per_channel_ = samples_per_channel;
-  std::fill(frame->data_,
-            frame->data_ + samples_per_channel * number_of_channels, value);
+  int16_t* frame_data = frame->mutable_data();
+  std::fill(frame_data,
+            frame_data + samples_per_channel * number_of_channels, value);
 }
 }  // namespace
 
@@ -40,8 +41,9 @@
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
@@ -56,8 +58,9 @@
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {5, 4, 3, 2, 1};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 3ec0ab6..70f06d0 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -169,7 +169,7 @@
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
@@ -208,8 +208,9 @@
   const size_t n_samples = participant.fake_frame()->samples_per_channel_;
 
   // Modify the frame so that it's not zero.
+  int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
   for (size_t j = 0; j < n_samples; ++j) {
-    participant.fake_frame()->data_[j] = static_cast<int16_t>(j);
+    fake_frame_data[j] = static_cast<int16_t>(j);
   }
 
   EXPECT_TRUE(mixer->AddSource(&participant));
@@ -223,7 +224,8 @@
   }
 
   EXPECT_EQ(
-      0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
+      0,
+      memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
 }
 
 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
@@ -328,7 +330,7 @@
     ResetFrame(participants[i].fake_frame());
     // Set the participant audio energy to increase with the index
     // |i|.
-    participants[i].fake_frame()->data_[0] = 100 * i;
+    participants[i].fake_frame()->mutable_data()[0] = 100 * i;
   }
 
   // Add all participants but the loudest for mixing.
@@ -444,7 +446,8 @@
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frames[0].vad_activity_ = AudioFrame::kVadPassive;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
@@ -464,7 +467,8 @@
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc
index 3a7f618..7732c7c 100644
--- a/webrtc/modules/audio_mixer/frame_combiner.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner.cc
@@ -50,10 +50,11 @@
                      AudioFrame* audio_frame_for_mixing) {
   audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
   audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
-  std::copy(input_frame->data_,
-            input_frame->data_ +
+  // TODO(yujo): can we optimize muted frames?
+  std::copy(input_frame->data(),
+            input_frame->data() +
                 input_frame->num_channels_ * input_frame->samples_per_channel_,
-            audio_frame_for_mixing->data_);
+            audio_frame_for_mixing->mutable_data());
   if (use_limiter) {
     AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
     RTC_DCHECK(limiter);
@@ -95,6 +96,7 @@
   add_buffer.fill(0);
 
   for (const auto& frame : input_frames) {
+    // TODO(yujo): skip this for muted frames.
     std::transform(frame.begin(), frame.end(), add_buffer.begin(),
                    add_buffer.begin(), std::plus<int32_t>());
   }
@@ -102,7 +104,7 @@
   if (use_limiter) {
     // Halve all samples to avoid saturation before limiting.
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_, [](int32_t a) {
+                   audio_frame_for_mixing->mutable_data(), [](int32_t a) {
                      return rtc::saturated_cast<int16_t>(a / 2);
                    });
 
@@ -127,7 +129,7 @@
     AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
   } else {
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_,
+                   audio_frame_for_mixing->mutable_data(),
                    [](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
   }
 }
@@ -206,10 +208,11 @@
     std::vector<rtc::ArrayView<const int16_t>> input_frames;
     for (size_t i = 0; i < mix_list.size(); ++i) {
       input_frames.push_back(rtc::ArrayView<const int16_t>(
-          mix_list[i]->data_, samples_per_channel * number_of_channels));
+          mix_list[i]->data(), samples_per_channel * number_of_channels));
     }
     CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
                           audio_frame_for_mixing);
   }
 }
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
index 8d745f6..250c6e1 100644
--- a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
@@ -112,9 +112,11 @@
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
       EXPECT_EQ(mixed_data, expected);
@@ -129,15 +131,17 @@
       SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
 
       SetUpFrames(rate, number_of_channels);
-      std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100,
-                0);
+      int16_t* frame1_data = frame1.mutable_data();
+      std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
       const std::vector<AudioFrame*> frames_to_combine = {&frame1};
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       std::vector<int16_t> expected(number_of_channels * rate / 100);
       std::iota(expected.begin(), expected.end(), 0);
@@ -190,8 +194,8 @@
           combiner.Combine(frames_to_combine, number_of_channels, rate,
                            number_of_streams, &audio_frame_for_mixing);
           cumulative_change += change_calculator.CalculateGainChange(
-              rtc::ArrayView<const int16_t>(frame1.data_, number_of_samples),
-              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data_,
+              rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
+              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
                                             number_of_samples));
         }
         RTC_DCHECK_LT(cumulative_change, 10);
diff --git a/webrtc/modules/audio_mixer/sine_wave_generator.cc b/webrtc/modules/audio_mixer/sine_wave_generator.cc
index f16312f..f295045 100644
--- a/webrtc/modules/audio_mixer/sine_wave_generator.cc
+++ b/webrtc/modules/audio_mixer/sine_wave_generator.cc
@@ -22,9 +22,10 @@
 
 void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) {
   RTC_DCHECK(frame);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; ++i) {
     for (size_t ch = 0; ch < frame->num_channels_; ++ch) {
-      frame->data_[frame->num_channels_ * i + ch] =
+      frame_data[frame->num_channels_ * i + ch] =
           rtc::saturated_cast<int16_t>(amplitude_ * sinf(phase_));
     }
     phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_;
diff --git a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
index 256c191..9f68b54 100644
--- a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
@@ -130,7 +130,7 @@
   audioproc::ReverseStream* msg = event->mutable_reverse_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  msg->set_data(frame.data_, data_size);
+  msg->set_data(frame.data(), data_size);
 
   worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task)));
 }
diff --git a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
index 2d7affc..5a49685 100644
--- a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
+++ b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
@@ -46,7 +46,7 @@
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_input_data(frame.data_, data_size);
+  stream->set_input_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
@@ -54,7 +54,7 @@
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_output_data(frame.data_, data_size);
+  stream->set_output_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddAudioProcessingState(
diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index 579a5c2..5f90e0f 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -394,13 +394,14 @@
   } else {
     deinterleaved = input_buffer_->ibuf()->channels();
   }
+  // TODO(yujo): handle muted frames more efficiently.
   if (num_proc_channels_ == 1) {
     // Downmix and deinterleave simultaneously.
-    DownmixInterleavedToMono(frame->data_, input_num_frames_,
+    DownmixInterleavedToMono(frame->data(), input_num_frames_,
                              num_input_channels_, deinterleaved[0]);
   } else {
     RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
-    Deinterleave(frame->data_,
+    Deinterleave(frame->data(),
                  input_num_frames_,
                  num_proc_channels_,
                  deinterleaved);
@@ -437,12 +438,13 @@
     data_ptr = output_buffer_.get();
   }
 
+  // TODO(yujo): handle muted frames more efficiently.
   if (frame->num_channels_ == num_channels_) {
     Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
-               frame->data_);
+               frame->mutable_data());
   } else {
     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
-                           frame->num_channels_, frame->data_);
+                           frame->num_channels_, frame->mutable_data());
   }
 }
 
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 922997e..9ece91f 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -1160,7 +1160,7 @@
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_input_data(frame->data_, data_size);
+    msg->set_input_data(frame->data(), data_size);
   }
 #endif
 
@@ -1178,7 +1178,7 @@
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
+    msg->set_output_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.capture));
@@ -1514,7 +1514,7 @@
         debug_dump_.render.event_msg->mutable_reverse_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_data(frame->data_, data_size);
+    msg->set_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.render));
diff --git a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index 2807355..19d90fa 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -479,11 +479,12 @@
                         RandomGenerator* rand_gen) {
   ASSERT_GT(amplitude, 0);
   ASSERT_LE(amplitude, 32767);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t ch = 0; ch < frame->num_channels_; ch++) {
     for (size_t k = 0; k < frame->samples_per_channel_; k++) {
       // Store random 16 bit number between -(amplitude+1) and
       // amplitude.
-      frame->data_[k * ch] =
+      frame_data[k * ch] =
           rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
     }
   }
diff --git a/webrtc/modules/audio_processing/audio_processing_unittest.cc b/webrtc/modules/audio_processing/audio_processing_unittest.cc
index 42cf418..799063d 100644
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc
@@ -87,7 +87,7 @@
 }
 
 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
-  ConvertToFloat(frame.data_, cb);
+  ConvertToFloat(frame.data(), cb);
 }
 
 // Number of channels including the keyboard channel.
@@ -127,31 +127,34 @@
   }
 }
 
-void VerifyChannelsAreEqual(int16_t* stereo, size_t samples_per_channel) {
+void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) {
   for (size_t i = 0; i < samples_per_channel; i++) {
     EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]);
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t value) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = value;
+    frame_data[i] = value;
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
   ASSERT_EQ(2u, frame->num_channels_);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void ScaleFrame(AudioFrame* frame, float scale) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
+    frame_data[i] = FloatS16ToS16(frame_data[i] * scale);
   }
 }
 
@@ -162,7 +165,7 @@
   if (frame1.num_channels_ != frame2.num_channels_) {
     return false;
   }
-  if (memcmp(frame1.data_, frame2.data_,
+  if (memcmp(frame1.data(), frame2.data(),
              frame1.samples_per_channel_ * frame1.num_channels_ *
                  sizeof(int16_t))) {
     return false;
@@ -205,9 +208,10 @@
 
 int16_t MaxAudioFrame(const AudioFrame& frame) {
   const size_t length = frame.samples_per_channel_ * frame.num_channels_;
-  int16_t max_data = AbsValue(frame.data_[0]);
+  const int16_t* frame_data = frame.data();
+  int16_t max_data = AbsValue(frame_data[0]);
   for (size_t i = 1; i < length; i++) {
-    max_data = std::max(max_data, AbsValue(frame.data_[i]));
+    max_data = std::max(max_data, AbsValue(frame_data[i]));
   }
 
   return max_data;
@@ -534,7 +538,7 @@
                         ChannelBuffer<float>* cb) {
   // The files always contain stereo audio.
   size_t frame_size = frame->samples_per_channel_ * 2;
-  size_t read_count = fread(frame->data_,
+  size_t read_count = fread(frame->mutable_data(),
                             sizeof(int16_t),
                             frame_size,
                             file);
@@ -545,7 +549,7 @@
   }
 
   if (frame->num_channels_ == 1) {
-    MixStereoToMono(frame->data_, frame->data_,
+    MixStereoToMono(frame->data(), frame->mutable_data(),
                     frame->samples_per_channel_);
   }
 
@@ -1601,11 +1605,13 @@
     ASSERT_EQ(0, feof(far_file_));
     ASSERT_EQ(0, feof(near_file_));
     while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
-      CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
+      CopyLeftToRightChannel(revframe_->mutable_data(),
+                             revframe_->samples_per_channel_);
 
       ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_));
 
-      CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
+      CopyLeftToRightChannel(frame_->mutable_data(),
+                             frame_->samples_per_channel_);
       frame_->vad_activity_ = AudioFrame::kVadUnknown;
 
       ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
@@ -1615,7 +1621,7 @@
       ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
       analog_level = apm_->gain_control()->stream_analog_level();
 
-      VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
+      VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_);
     }
     rewind(far_file_);
     rewind(near_file_);
@@ -1747,7 +1753,7 @@
                   msg.channel(i).size());
         }
       } else {
-        memcpy(revframe_->data_, msg.data().data(), msg.data().size());
+        memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*revframe_, revfloat_cb_.get());
@@ -1778,7 +1784,8 @@
                   msg.input_channel(i).size());
         }
       } else {
-        memcpy(frame_->data_, msg.input_data().data(), msg.input_data().size());
+        memcpy(frame_->mutable_data(), msg.input_data().data(),
+               msg.input_data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*frame_, float_cb_.get());
@@ -1987,7 +1994,7 @@
       EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
 
       EXPECT_NOERR(apm_->ProcessStream(frame_));
-      Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
+      Deinterleave(frame_->data(), samples_per_channel, num_output_channels,
                    output_int16.channels());
 
       EXPECT_NOERR(fapm->ProcessStream(
@@ -2151,7 +2158,7 @@
       ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
 
       size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
-      size_t write_count = fwrite(frame_->data_,
+      size_t write_count = fwrite(frame_->data(),
                                   sizeof(int16_t),
                                   frame_size,
                                   out_file_);
diff --git a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
index d1cd484..4a5ada5 100644
--- a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
@@ -29,9 +29,10 @@
       msg.output_data().size()) {
     return false;
   } else {
+    const int16_t* frame_data = frame.data();
     for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_;
          ++k) {
-      if (msg.output_data().data()[k] != frame.data_[k]) {
+      if (msg.output_data().data()[k] != frame_data[k]) {
         return false;
       }
     }
@@ -78,10 +79,11 @@
     interface_used_ = InterfaceType::kFixedInterface;
 
     // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(fwd_frame_.data_[0]) * fwd_frame_.samples_per_channel_ *
+    RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ *
                      fwd_frame_.num_channels_,
                  msg.input_data().size());
-    memcpy(fwd_frame_.data_, msg.input_data().data(), msg.input_data().size());
+    memcpy(fwd_frame_.mutable_data(), msg.input_data().data(),
+           msg.input_data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
@@ -105,9 +107,10 @@
     if (artificial_nearend_buffer_reader_->Read(
             artificial_nearend_buf_.get())) {
       if (msg.has_input_data()) {
+        int16_t* fwd_frame_data = fwd_frame_.mutable_data();
         for (size_t k = 0; k < in_buf_->num_frames(); ++k) {
-          fwd_frame_.data_[k] = rtc::saturated_cast<int16_t>(
-              fwd_frame_.data_[k] +
+          fwd_frame_data[k] = rtc::saturated_cast<int16_t>(
+              fwd_frame_data[k] +
               static_cast<int16_t>(32767 *
                                    artificial_nearend_buf_->channels()[0][k]));
         }
@@ -191,7 +194,7 @@
     RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ *
                      rev_frame_.num_channels_,
                  msg.data().size());
-    memcpy(rev_frame_.data_, msg.data().data(), msg.data().size());
+    memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
diff --git a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
index 2173534..58b47e2 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
@@ -30,7 +30,7 @@
   RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
   // Copy the data from the input buffer.
   std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
-  S16ToFloat(src.data_, tmp.size(), tmp.data());
+  S16ToFloat(src.data(), tmp.size(), tmp.data());
   Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
                dest->channels());
 }
@@ -68,9 +68,10 @@
 void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
   RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
   RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
+  int16_t* dest_data = dest->mutable_data();
   for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
     for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
-      dest->data_[sample * dest->num_channels_ + ch] =
+      dest_data[sample * dest->num_channels_ + ch] =
           src.channels()[ch][sample] * 32767;
     }
   }
diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h
index 4d38c67..f4d42a8 100644
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@@ -271,11 +271,8 @@
  * states.
  *
  * Notes
- * - The total number of samples in |data_| is
- *   samples_per_channel_ * num_channels_
- *
+ * - The total number of samples is samples_per_channel_ * num_channels_
  * - Stereo data is interleaved starting with the left channel.
- *
  */
 class AudioFrame {
  public:
@@ -306,8 +303,7 @@
 
   AudioFrame();
 
-  // Resets all members to their default state (except does not modify the
-  // contents of |data_|).
+  // Resets all members to their default state.
   void Reset();
 
   void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
@@ -317,16 +313,21 @@
 
   void CopyFrom(const AudioFrame& src);
 
-  // TODO(yujo): upcoming API update. Currently, both of these just return
-  // data_.
+  // data() returns a zeroed static buffer if the frame is muted.
+  // mutable_frame() always returns a non-static buffer; the first call to
+  // mutable_frame() zeros the non-static buffer and marks the frame unmuted.
   const int16_t* data() const;
   int16_t* mutable_data();
 
+  // Prefer to mute frames using AudioFrameOperations::Mute.
+  void Mute();
+  // Frame is muted by default.
+  bool muted() const;
+
   // These methods are deprecated. Use the functions in
   // webrtc/audio/utility instead. These methods will exists for a
   // short period of time until webrtc clients have updated. See
   // webrtc:6548 for details.
-  RTC_DEPRECATED void Mute();
   RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
   RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
 
@@ -339,7 +340,6 @@
   // NTP time of the estimated capture time in local timebase in milliseconds.
   // -1 represents an uninitialized value.
   int64_t ntp_time_ms_ = -1;
-  int16_t data_[kMaxDataSizeSamples];
   size_t samples_per_channel_ = 0;
   int sample_rate_hz_ = 0;
   size_t num_channels_ = 0;
@@ -347,13 +347,24 @@
   VADActivity vad_activity_ = kVadUnknown;
 
  private:
+  // A permamently zeroed out buffer to represent muted frames. This is a
+  // header-only class, so the only way to avoid creating a separate empty
+  // buffer per translation unit is to wrap a static in an inline function.
+  static const int16_t* empty_data() {
+    static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
+    static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
+    return kEmptyData;
+  }
+
+  int16_t data_[kMaxDataSizeSamples];
+  bool muted_ = true;
+
   RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
 };
 
-// TODO(henrik.lundin) Can we remove the call to data_()?
-// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
-inline AudioFrame::AudioFrame()
-    : data_() {
+inline AudioFrame::AudioFrame() {
+  // Visual Studio doesn't like this in the class definition.
+  static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
 }
 
 inline void AudioFrame::Reset() {
@@ -363,6 +374,7 @@
   timestamp_ = 0;
   elapsed_time_ms_ = -1;
   ntp_time_ms_ = -1;
+  muted_ = true;
   samples_per_channel_ = 0;
   sample_rate_hz_ = 0;
   num_channels_ = 0;
@@ -388,10 +400,11 @@
 
   const size_t length = samples_per_channel * num_channels;
   assert(length <= kMaxDataSizeSamples);
-  if (data != NULL) {
+  if (data != nullptr) {
     memcpy(data_, data, sizeof(int16_t) * length);
+    muted_ = false;
   } else {
-    memset(data_, 0, sizeof(int16_t) * length);
+    muted_ = true;
   }
 }
 
@@ -402,6 +415,7 @@
   timestamp_ = src.timestamp_;
   elapsed_time_ms_ = src.elapsed_time_ms_;
   ntp_time_ms_ = src.ntp_time_ms_;
+  muted_ = src.muted();
   samples_per_channel_ = src.samples_per_channel_;
   sample_rate_hz_ = src.sample_rate_hz_;
   speech_type_ = src.speech_type_;
@@ -410,24 +424,36 @@
 
   const size_t length = samples_per_channel_ * num_channels_;
   assert(length <= kMaxDataSizeSamples);
-  memcpy(data_, src.data_, sizeof(int16_t) * length);
+  if (!src.muted()) {
+    memcpy(data_, src.data(), sizeof(int16_t) * length);
+    muted_ = false;
+  }
 }
 
 inline const int16_t* AudioFrame::data() const {
-  return data_;
+  return muted_ ? empty_data() : data_;
 }
 
+// TODO(henrik.lundin) Can we skip zeroing the buffer?
+// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
 inline int16_t* AudioFrame::mutable_data() {
+  if (muted_) {
+    memset(data_, 0, kMaxDataSizeBytes);
+    muted_ = false;
+  }
   return data_;
 }
 
 inline void AudioFrame::Mute() {
-  memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
+  muted_ = true;
 }
 
+inline bool AudioFrame::muted() const { return muted_; }
+
 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
   assert((num_channels_ > 0) && (num_channels_ < 3));
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
+  if (muted_) return *this;
 
   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
     data_[i] = static_cast<int16_t>(data_[i] >> rhs);
@@ -441,7 +467,7 @@
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
   if (num_channels_ != rhs.num_channels_) return *this;
 
-  bool noPrevData = false;
+  bool noPrevData = muted_;
   if (samples_per_channel_ != rhs.samples_per_channel_) {
     if (samples_per_channel_ == 0) {
       // special case we have no data to start with
@@ -460,17 +486,21 @@
 
   if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
 
-  if (noPrevData) {
-    memcpy(data_, rhs.data_,
-           sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
-  } else {
-    // IMPROVEMENT this can be done very fast in assembly
-    for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
-      int32_t wrap_guard =
-          static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
-      data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!rhs.muted()) {
+    muted_ = false;
+    if (noPrevData) {
+      memcpy(data_, rhs.data(),
+             sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
+    } else {
+      // IMPROVEMENT this can be done very fast in assembly
+      for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
+        int32_t wrap_guard =
+            static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
+        data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
+
   return *this;
 }
 
diff --git a/webrtc/modules/module_common_types_unittest.cc b/webrtc/modules/module_common_types_unittest.cc
index e4d5033..f91668b 100644
--- a/webrtc/modules/module_common_types_unittest.cc
+++ b/webrtc/modules/module_common_types_unittest.cc
@@ -10,10 +10,111 @@
 
 #include "webrtc/modules/include/module_common_types.h"
 
+#include <string.h>  // memcmp
+
 #include "webrtc/test/gtest.h"
 
 namespace webrtc {
 
+namespace {
+
+bool AllSamplesAre(int16_t sample, const AudioFrame& frame) {
+  const int16_t* frame_data = frame.data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    if (frame_data[i] != sample) {
+      return false;
+    }
+  }
+  return true;
+}
+
+constexpr int kId = 16;
+constexpr uint32_t kTimestamp = 27;
+constexpr int kSampleRateHz = 16000;
+constexpr size_t kNumChannels = 1;
+constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
+
+}  // namespace
+
+TEST(AudioFrameTest, FrameStartsMuted) {
+  AudioFrame frame;
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) {
+  AudioFrame frame;
+  frame.mutable_data();
+  EXPECT_FALSE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, MutedFrameBufferIsZeroed) {
+  AudioFrame frame;
+  int16_t* frame_data = frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
+  }
+  ASSERT_TRUE(AllSamplesAre(17, frame));
+  frame.Mute();
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UpdateFrame) {
+  AudioFrame frame;
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
+                    AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels);
+
+  EXPECT_EQ(kId, frame.id_);
+  EXPECT_EQ(kTimestamp, frame.timestamp_);
+  EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel_);
+  EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz_);
+  EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_);
+  EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_);
+  EXPECT_EQ(kNumChannels, frame.num_channels_);
+
+  EXPECT_FALSE(frame.muted());
+  EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples)));
+
+  frame.UpdateFrame(kId, kTimestamp, nullptr /* data*/, kSamplesPerChannel,
+                    kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                    kNumChannels);
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, CopyFrom) {
+  AudioFrame frame1;
+  AudioFrame frame2;
+
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame2.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.id_, frame1.id_);
+  EXPECT_EQ(frame2.timestamp_, frame1.timestamp_);
+  EXPECT_EQ(frame2.samples_per_channel_, frame1.samples_per_channel_);
+  EXPECT_EQ(frame2.sample_rate_hz_, frame1.sample_rate_hz_);
+  EXPECT_EQ(frame2.speech_type_, frame1.speech_type_);
+  EXPECT_EQ(frame2.vad_activity_, frame1.vad_activity_);
+  EXPECT_EQ(frame2.num_channels_, frame1.num_channels_);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+
+  frame2.UpdateFrame(kId, kTimestamp, nullptr /* data */, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+}
+
 TEST(IsNewerSequenceNumber, Equal) {
   EXPECT_FALSE(IsNewerSequenceNumber(0x0001, 0x0001));
 }
diff --git a/webrtc/tools/agc/activity_metric.cc b/webrtc/tools/agc/activity_metric.cc
index 9715d62..8ea1939 100644
--- a/webrtc/tools/agc/activity_metric.cc
+++ b/webrtc/tools/agc/activity_metric.cc
@@ -64,11 +64,12 @@
   const double sum_squared_silence = kRmsSilence * kRmsSilence *
       frame->samples_per_channel_;
   double sum_squared = 0;
+  int16_t* frame_data = frame->mutable_data();
   for (size_t n = 0; n < frame->samples_per_channel_; n++)
-    sum_squared += frame->data_[n] * frame->data_[n];
+    sum_squared += frame_data[n] * frame_data[n];
   if (sum_squared <= sum_squared_silence) {
     for (size_t n = 0; n < frame->samples_per_channel_; n++)
-      frame->data_[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
+      frame_data[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
   }
 }
 
@@ -105,10 +106,11 @@
       return -1;
     video_vad_[video_index_++] = p_video;
     AudioFeatures features;
+    const int16_t* frame_data = frame.data();
     audio_processing_->ExtractFeatures(
-        frame.data_, frame.samples_per_channel_, &features);
+        frame_data, frame.samples_per_channel_, &features);
     if (FLAG_standalone_vad) {
-      standalone_vad_->AddAudio(frame.data_,
+      standalone_vad_->AddAudio(frame_data,
                                 frame.samples_per_channel_);
     }
     if (features.num_frames > 0) {
@@ -251,7 +253,7 @@
   bool in_false_positive_region = false;
   int total_false_positive_duration = 0;
   bool video_adapted = false;
-  while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),
+  while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),
                                  kSamplesToRead, pcm_fid)) {
     assert(true_vad_index < kMaxNumFrames);
     ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
diff --git a/webrtc/voice_engine/BUILD.gn b/webrtc/voice_engine/BUILD.gn
index 253eacb..e16b176 100644
--- a/webrtc/voice_engine/BUILD.gn
+++ b/webrtc/voice_engine/BUILD.gn
@@ -57,6 +57,7 @@
   deps = [
     ":audio_coder",
     "..:webrtc_common",
+    "../audio/utility:audio_frame_operations",
     "../base:rtc_base_approved",
     "../common_audio",
     "../modules:module_api",
diff --git a/webrtc/voice_engine/audio_level.cc b/webrtc/voice_engine/audio_level.cc
index b2f4df4..27a7dde 100644
--- a/webrtc/voice_engine/audio_level.cc
+++ b/webrtc/voice_engine/audio_level.cc
@@ -50,9 +50,10 @@
 
 void AudioLevel::ComputeLevel(const AudioFrame& audioFrame) {
   // Check speech level (works for 2 channels as well)
-  int16_t abs_value = WebRtcSpl_MaxAbsValueW16(
-      audioFrame.data_,
-      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
+  int16_t abs_value = audioFrame.muted() ? 0 :
+      WebRtcSpl_MaxAbsValueW16(
+          audioFrame.data(),
+          audioFrame.samples_per_channel_ * audioFrame.num_channels_);
 
   // Protect member access using a lock since this method is called on a
   // dedicated audio thread in the RecordedDataIsAvailable() callback.
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 0a9e9fc..1612270 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -660,7 +660,7 @@
     rtc::CritScope cs(&_callbackCritSect);
     if (audio_sink_) {
       AudioSinkInterface::Data data(
-          &audioFrame->data_[0], audioFrame->samples_per_channel_,
+          audioFrame->data(), audioFrame->samples_per_channel_,
           audioFrame->sample_rate_hz_, audioFrame->num_channels_,
           audioFrame->timestamp_);
       audio_sink_->OnData(data);
@@ -2786,12 +2786,12 @@
   if (_includeAudioLevelIndication) {
     size_t length =
         audio_input->samples_per_channel_ * audio_input->num_channels_;
-    RTC_CHECK_LE(length, sizeof(audio_input->data_));
+    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
     if (is_muted && previous_frame_muted_) {
       rms_level_.AnalyzeMuted(length);
     } else {
       rms_level_.Analyze(
-          rtc::ArrayView<const int16_t>(audio_input->data_, length));
+          rtc::ArrayView<const int16_t>(audio_input->data(), length));
     }
   }
   previous_frame_muted_ = is_muted;
@@ -2951,8 +2951,8 @@
   if (_mixFileWithMicrophone) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audio_input->data_, audio_input->num_channels_, fileBuffer.get(),
-               1, fileSamples);
+    MixWithSat(audio_input->mutable_data(), audio_input->num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     // Replace ACM audio with file.
     // Currently file stream is always mono.
@@ -2991,8 +2991,8 @@
   if (audioFrame.samples_per_channel_ == fileSamples) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audioFrame.data_, audioFrame.num_channels_, fileBuffer.get(), 1,
-               fileSamples);
+    MixWithSat(audioFrame.mutable_data(), audioFrame.num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, _channelId),
                  "Channel::MixAudioWithFile() samples_per_channel_(%" PRIuS
diff --git a/webrtc/voice_engine/file_player.cc b/webrtc/voice_engine/file_player.cc
index a7db182..d2eadae 100644
--- a/webrtc/voice_engine/file_player.cc
+++ b/webrtc/voice_engine/file_player.cc
@@ -126,9 +126,9 @@
     unresampledAudioFrame.sample_rate_hz_ = _codec.plfreq;
 
     // L16 is un-encoded data. Just pull 10 ms.
-    size_t lengthInBytes = sizeof(unresampledAudioFrame.data_);
+    size_t lengthInBytes = AudioFrame::kMaxDataSizeBytes;
     if (_fileModule.PlayoutAudioData(
-            reinterpret_cast<int8_t*>(unresampledAudioFrame.data_),
+            reinterpret_cast<int8_t*>(unresampledAudioFrame.mutable_data()),
             lengthInBytes) == -1) {
       // End of file reached.
       return -1;
@@ -173,7 +173,7 @@
     memset(outBuffer, 0, outLen * sizeof(int16_t));
     return 0;
   }
-  _resampler.Push(unresampledAudioFrame.data_,
+  _resampler.Push(unresampledAudioFrame.data(),
                   unresampledAudioFrame.samples_per_channel_, outBuffer,
                   MAX_AUDIO_BUFFER_IN_SAMPLES, outLen);
 
diff --git a/webrtc/voice_engine/file_recorder.cc b/webrtc/voice_engine/file_recorder.cc
index 5448451..eed3c07 100644
--- a/webrtc/voice_engine/file_recorder.cc
+++ b/webrtc/voice_engine/file_recorder.cc
@@ -12,6 +12,7 @@
 
 #include <list>
 
+#include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/base/logging.h"
 #include "webrtc/base/platform_thread.h"
 #include "webrtc/common_audio/resampler/include/resampler.h"
@@ -159,12 +160,10 @@
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Sample value is the average of left and right buffer rounded to
-      // closest integer value. Note samples can be either 1 or 2 byte.
-      tempAudioFrame.data_[i] = ((incomingAudioFrame.data_[2 * i] +
-                                  incomingAudioFrame.data_[(2 * i) + 1] + 1) >>
-                                 1);
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::StereoToMono(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   } else if (incomingAudioFrame.num_channels_ == 1 && _moduleFile->IsStereo()) {
     // Recording stereo but incoming audio is mono.
@@ -172,10 +171,10 @@
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Duplicate sample to both channels
-      tempAudioFrame.data_[2 * i] = incomingAudioFrame.data_[i];
-      tempAudioFrame.data_[2 * i + 1] = incomingAudioFrame.data_[i];
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::MonoToStereo(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   }
 
@@ -204,8 +203,9 @@
     _audioResampler.ResetIfNeeded(ptrAudioFrame->sample_rate_hz_,
                                   codec_info_.plfreq,
                                   ptrAudioFrame->num_channels_);
+    // TODO(yujo): skip resample if frame is muted.
     _audioResampler.Push(
-        ptrAudioFrame->data_,
+        ptrAudioFrame->data(),
         ptrAudioFrame->samples_per_channel_ * ptrAudioFrame->num_channels_,
         reinterpret_cast<int16_t*>(_audioBuffer), MAX_AUDIO_BUFFER_IN_BYTES,
         outLen);
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index e14b03f..6796f84 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -936,7 +936,7 @@
     {
         // Currently file stream is always mono.
         // TODO(xians): Change the code when FilePlayer supports real stereo.
-        MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.mutable_data(),
                    _audioFrame.num_channels_,
                    fileBuffer.get(),
                    1,
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index f394762..f877c43 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -25,7 +25,7 @@
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame) {
-  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+  RemixAndResample(src_frame.data(), src_frame.samples_per_channel_,
                    src_frame.num_channels_, src_frame.sample_rate_hz_,
                    resampler, dst_frame);
   dst_frame->timestamp_ = src_frame.timestamp_;
@@ -64,13 +64,18 @@
             << ", audio_ptr_num_channels = " << audio_ptr_num_channels;
   }
 
+  // TODO(yujo): for muted input frames, don't resample. Either 1) allow
+  // resampler to return output length without doing the resample, so we know
+  // how much to zero here; or 2) make resampler accept a hint that the input is
+  // zeroed.
   const size_t src_length = samples_per_channel * audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+  int out_length = resampler->Resample(audio_ptr, src_length,
+                                       dst_frame->mutable_data(),
                                        AudioFrame::kMaxDataSizeSamples);
   if (out_length == -1) {
     FATAL() << "Resample failed: audio_ptr = " << audio_ptr
             << ", src_length = " << src_length
-            << ", dst_frame->data_ = " << dst_frame->data_;
+            << ", dst_frame->mutable_data() = " << dst_frame->mutable_data();
   }
   dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
 
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index 94abc0f..992cb71 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -47,12 +47,13 @@
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
 void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 1;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = static_cast<int16_t>(data * i);
+    frame_data[i] = static_cast<int16_t>(data * i);
   }
 }
 
@@ -67,13 +68,14 @@
                     float right,
                     int sample_rate_hz,
                     AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 2;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 2] = static_cast<int16_t>(left * i);
-    frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
+    frame_data[i * 2] = static_cast<int16_t>(left * i);
+    frame_data[i * 2 + 1] = static_cast<int16_t>(right * i);
   }
 }
 
@@ -90,15 +92,16 @@
                   float ch4,
                   int sample_rate_hz,
                   AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 4;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
-    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
-    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
-    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+    frame_data[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
   }
 }
 
@@ -119,11 +122,13 @@
   for (size_t delay = 0; delay <= max_delay; delay++) {
     float mse = 0;
     float variance = 0;
+    const int16_t* ref_frame_data = ref_frame.data();
+    const int16_t* test_frame_data = test_frame.data();
     for (size_t i = 0; i < ref_frame.samples_per_channel_ *
         ref_frame.num_channels_ - delay; i++) {
-      int error = ref_frame.data_[i] - test_frame.data_[i + delay];
+      int error = ref_frame_data[i] - test_frame_data[i + delay];
       mse += error * error;
-      variance += ref_frame.data_[i] * ref_frame.data_[i];
+      variance += ref_frame_data[i] * ref_frame_data[i];
     }
     float snr = 100;  // We assign 100 dB to the zero-error case.
     if (mse > 0)
@@ -140,9 +145,11 @@
 void VerifyFramesAreEqual(const AudioFrame& ref_frame,
                           const AudioFrame& test_frame) {
   VerifyParams(ref_frame, test_frame);
+  const int16_t* ref_frame_data = ref_frame.data();
+  const int16_t* test_frame_data  = test_frame.data();
   for (size_t i = 0;
        i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) {
-    EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
+    EXPECT_EQ(ref_frame_data[i], test_frame_data[i]);
   }
 }
 
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index 8072cc8..1ddf53c 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -716,7 +716,7 @@
   assert(sample_rate == audioFrame_.sample_rate_hz_);
 
   // Deliver audio (PCM) samples to the ADM
-  memcpy(audio_data, audioFrame_.data_,
+  memcpy(audio_data, audioFrame_.data(),
          sizeof(int16_t) * number_of_frames * number_of_channels);
 
   *elapsed_time_ms = audioFrame_.elapsed_time_ms_;