A/V synchronization at the beginning of a recording session

o do not use edts/elst boxes since these optional boxes are ignored
o manipulate the first video/audio frame duration to make sure that the rest
  of the audio/video is in sync (ideally, we should only manipulate
  the vidoe frame duration, not the audio)
o reduce the initial audio mute/suppression period, which is used to
  eliminate the "recording" sound.

bug - 3405882 and 3362703

Change-Id: Ib0acfb4f3843b365157288951dc122b006299c18
diff --git a/include/media/stagefright/AudioSource.h b/include/media/stagefright/AudioSource.h
index b35a6e6..9e6f0e2 100644
--- a/include/media/stagefright/AudioSource.h
+++ b/include/media/stagefright/AudioSource.h
@@ -59,12 +59,12 @@
 
         // After the initial mute, we raise the volume linearly
         // over kAutoRampDurationUs.
-        kAutoRampDurationUs = 700000,
+        kAutoRampDurationUs = 300000,
 
         // This is the initial mute duration to suppress
         // the video recording signal tone
-        kAutoRampStartUs = 1000000,
-      };
+        kAutoRampStartUs = 0,
+    };
 
     Mutex mLock;
     Condition mFrameAvailableCondition;
diff --git a/media/libstagefright/AudioSource.cpp b/media/libstagefright/AudioSource.cpp
index cd0e021..bbdec02 100644
--- a/media/libstagefright/AudioSource.cpp
+++ b/media/libstagefright/AudioSource.cpp
@@ -263,6 +263,13 @@
         return OK;
     }
 
+    // Drop retrieved and previously lost audio data.
+    if (mNumFramesReceived == 0 && timeUs < mStartTimeUs) {
+        mRecord->getInputFramesLost();
+        LOGV("Drop audio data at %lld/%lld us", timeUs, mStartTimeUs);
+        return OK;
+    }
+
     if (mNumFramesReceived == 0 && mPrevSampleTimeUs == 0) {
         mInitialReadTimeUs = timeUs;
         // Initial delay
@@ -277,7 +284,13 @@
 
     int64_t timestampUs = mPrevSampleTimeUs;
 
-    size_t numLostBytes = mRecord->getInputFramesLost();
+    size_t numLostBytes = 0;
+    if (mNumFramesReceived > 0) {  // Ignore earlier frame lost
+        // getInputFramesLost() returns the number of lost frames.
+        // Convert number of frames lost to number of bytes lost.
+        numLostBytes = mRecord->getInputFramesLost() * mRecord->frameSize();
+    }
+
     CHECK_EQ(numLostBytes & 1, 0u);
     CHECK_EQ(audioBuffer.size & 1, 0u);
     size_t bufferSize = numLostBytes + audioBuffer.size;
diff --git a/media/libstagefright/CameraSource.cpp b/media/libstagefright/CameraSource.cpp
index 66e0657..8a24bc4 100644
--- a/media/libstagefright/CameraSource.cpp
+++ b/media/libstagefright/CameraSource.cpp
@@ -598,8 +598,7 @@
     }
 
     if (mNumGlitches > 0) {
-        LOGW("%d long delays between neighboring video frames during",
-                mNumGlitches);
+        LOGW("%d long delays between neighboring video frames", mNumGlitches);
     }
 
     CHECK_EQ(mNumFramesReceived, mNumFramesEncoded + mNumFramesDropped);
@@ -696,10 +695,9 @@
         int32_t msgType, const sp<IMemory> &data) {
     LOGV("dataCallbackTimestamp: timestamp %lld us", timestampUs);
     Mutex::Autolock autoLock(mLock);
-    if (!mStarted) {
+    if (!mStarted || (mNumFramesReceived == 0 && timestampUs < mStartTimeUs)) {
+        LOGV("Drop frame at %lld/%lld us", timestampUs, mStartTimeUs);
         releaseOneRecordingFrame(data);
-        ++mNumFramesReceived;
-        ++mNumFramesDropped;
         return;
     }
 
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index b11789d..5d6ea7c 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -1281,7 +1281,21 @@
     initTrackingProgressStatus(params);
 
     sp<MetaData> meta = new MetaData;
+    if (mIsRealTimeRecording && mOwner->numTracks() > 1) {
+        /*
+         * This extra delay of accepting incoming audio/video signals
+         * helps to align a/v start time at the beginning of a recording
+         * session, and it also helps eliminate the "recording" sound for
+         * camcorder applications.
+         *
+         * Ideally, this platform-specific value should be defined
+         * in media_profiles.xml file
+         */
+        startTimeUs += 700000;
+    }
+
     meta->setInt64(kKeyTime, startTimeUs);
+
     status_t err = mSource->start(meta.get());
     if (err != OK) {
         mDone = mReachedEOS = true;
@@ -1944,7 +1958,11 @@
                      ((timestampUs * mTimeScale + 500000LL) / 1000000LL -
                      (lastTimestampUs * mTimeScale + 500000LL) / 1000000LL);
 
-            if (currDurationTicks != lastDurationTicks) {
+            // Force the first sample to have its own stts entry so that
+            // we can adjust its value later to maintain the A/V sync.
+            if (mNumSamples == 3 || currDurationTicks != lastDurationTicks) {
+                LOGV("%s lastDurationUs: %lld us, currDurationTicks: %lld us",
+                        mIsAudio? "Audio": "Video", lastDurationUs, currDurationTicks);
                 addOneSttsTableEntry(sampleCount, lastDurationUs);
                 sampleCount = 1;
             } else {
@@ -1957,6 +1975,8 @@
             }
             previousSampleSize = sampleSize;
         }
+        LOGV("%s timestampUs/lastTimestampUs: %lld/%lld",
+                mIsAudio? "Audio": "Video", timestampUs, lastTimestampUs);
         lastDurationUs = timestampUs - lastTimestampUs;
         lastDurationTicks = currDurationTicks;
         lastTimestampUs = timestampUs;
@@ -2028,7 +2048,16 @@
     } else {
         ++sampleCount;  // Count for the last sample
     }
-    addOneSttsTableEntry(sampleCount, lastDurationUs);
+
+    if (mNumSamples <= 2) {
+        addOneSttsTableEntry(1, lastDurationUs);
+        if (sampleCount - 1 > 0) {
+            addOneSttsTableEntry(sampleCount - 1, lastDurationUs);
+        }
+    } else {
+        addOneSttsTableEntry(sampleCount, lastDurationUs);
+    }
+
     mTrackDurationUs += lastDurationUs;
     mReachedEOS = true;
     LOGI("Received total/0-length (%d/%d) buffers and encoded %d frames. - %s",
@@ -2153,6 +2182,9 @@
     int32_t mvhdTimeScale = mOwner->getTimeScale();
     int64_t trakDurationUs = getDurationUs();
 
+    // Compensate for small start time difference from different media tracks
+    int64_t trackStartTimeOffsetUs = 0;
+
     mOwner->beginBox("trak");
 
       mOwner->beginBox("tkhd");
@@ -2191,26 +2223,8 @@
 
       int64_t moovStartTimeUs = mOwner->getStartTimestampUs();
       if (mStartTimestampUs != moovStartTimeUs) {
-        mOwner->beginBox("edts");
-          mOwner->beginBox("elst");
-            mOwner->writeInt32(0);           // version=0, flags=0: 32-bit time
-            mOwner->writeInt32(2);           // never ends with an empty list
-
-            // First elst entry: specify the starting time offset
-            int64_t offsetUs = mStartTimestampUs - moovStartTimeUs;
-            LOGV("OffsetUs: %lld", offsetUs);
-            int32_t seg = (offsetUs * mvhdTimeScale + 5E5) / 1E6;
-            mOwner->writeInt32(seg);         // in mvhd timecale
-            mOwner->writeInt32(-1);          // starting time offset
-            mOwner->writeInt32(1 << 16);     // rate = 1.0
-
-            // Second elst entry: specify the track duration
-            seg = (trakDurationUs * mvhdTimeScale + 5E5) / 1E6;
-            mOwner->writeInt32(seg);         // in mvhd timescale
-            mOwner->writeInt32(0);
-            mOwner->writeInt32(1 << 16);
-          mOwner->endBox();
-        mOwner->endBox();
+          CHECK(mStartTimestampUs > moovStartTimeUs);
+          trackStartTimeOffsetUs = mStartTimestampUs - moovStartTimeUs;
       }
 
       mOwner->beginBox("mdia");
@@ -2466,7 +2480,7 @@
           mOwner->beginBox("stts");
             mOwner->writeInt32(0);  // version=0, flags=0
             mOwner->writeInt32(mNumSttsTableEntries);
-            int64_t prevTimestampUs = 0;
+            int64_t prevTimestampUs = trackStartTimeOffsetUs;
             for (List<SttsTableEntry>::iterator it = mSttsTableEntries.begin();
                  it != mSttsTableEntries.end(); ++it) {
                 mOwner->writeInt32(it->sampleCount);