Fixed the software AAC encoder

Make sure that each input frame contains at least 1024 samples, as
required by the AAC encoder, and fix the incorrect timestamp.

Change-Id: I344cafe8c89be51d6e64552fab70539990ff6049
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index 4242d36..f16b225 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -185,7 +185,7 @@
         return UNKNOWN_ERROR;
     }
 
-    mStartTimestampUs = 0;
+    mStartTimestampUs = -1;
     if (mStarted) {
         if (mPaused) {
             mPaused = false;
@@ -561,8 +561,7 @@
     LOGI("setStartTimestampUs: %lld", timeUs);
     CHECK(timeUs >= 0);
     Mutex::Autolock autoLock(mLock);
-    if (mStartTimestampUs == 0 ||
-        (mStartTimestampUs > 0 && mStartTimestampUs > timeUs)) {
+    if (mStartTimestampUs < 0 || mStartTimestampUs > timeUs) {
         mStartTimestampUs = timeUs;
         LOGI("Earliest track starting time: %lld", mStartTimestampUs);
     }
diff --git a/media/libstagefright/codecs/aacenc/AACEncoder.cpp b/media/libstagefright/codecs/aacenc/AACEncoder.cpp
index 52204fa..b914023 100644
--- a/media/libstagefright/codecs/aacenc/AACEncoder.cpp
+++ b/media/libstagefright/codecs/aacenc/AACEncoder.cpp
@@ -139,6 +139,8 @@
 
     CHECK_EQ(OK, initCheck());
 
+    mNumInputSamples = 0;
+    mAnchorTimeUs = 0;
     mFrameCount = 0;
     mSource->start(params);
 
@@ -205,33 +207,65 @@
         buffer->set_range(0, 2);
         buffer->meta_data()->setInt32(kKeyIsCodecConfig, true);
         *out = buffer;
-        mInputBuffer = NULL;
         ++mFrameCount;
         return OK;
     } else if (mFrameCount == 1) {
         buffer->meta_data()->setInt32(kKeyIsCodecConfig, false);
     }
 
-    // XXX: We assume that the input buffer contains at least
-    // (actually, exactly) 1024 PCM samples. This needs to be fixed.
-    if (mInputBuffer == NULL) {
-        if (mSource->read(&mInputBuffer, options) != OK) {
-            LOGE("failed to read from input audio source");
-            return UNKNOWN_ERROR;
+    while (mNumInputSamples < kNumSamplesPerFrame) {
+        if (mInputBuffer == NULL) {
+            if (mSource->read(&mInputBuffer, options) != OK) {
+                if (mNumInputSamples == 0) {
+                    return ERROR_END_OF_STREAM;
+                }
+                memset(&mInputFrame[mNumInputSamples],
+                       0,
+                       sizeof(int16_t) * (kNumSamplesPerFrame - mNumInputSamples));
+                mNumInputSamples = 0;
+                break;
+            }
+
+            size_t align = mInputBuffer->range_length() % sizeof(int16_t);
+            CHECK_EQ(align, 0);
+
+            int64_t timeUs;
+            if (mInputBuffer->meta_data()->findInt64(kKeyTime, &timeUs)) {
+                mAnchorTimeUs = timeUs;
+            }
         }
+        size_t copy =
+            (kNumSamplesPerFrame - mNumInputSamples) * sizeof(int16_t);
+
+        if (copy > mInputBuffer->range_length()) {
+            copy = mInputBuffer->range_length();
+        }
+
+        memcpy(&mInputFrame[mNumInputSamples],
+               (const uint8_t *) mInputBuffer->data()
+                    + mInputBuffer->range_offset(),
+               copy);
+
+        mInputBuffer->set_range(
+               mInputBuffer->range_offset() + copy,
+               mInputBuffer->range_length() - copy);
+
         if (mInputBuffer->range_length() == 0) {
             mInputBuffer->release();
             mInputBuffer = NULL;
-            return ERROR_END_OF_STREAM;
         }
-        VO_CODECBUFFER inputData;
-        memset(&inputData, 0, sizeof(inputData));
-        inputData.Buffer = (unsigned char*) mInputBuffer->data();
-        inputData.Length = mInputBuffer->range_length();
-        CHECK(VO_ERR_NONE == mApiHandle->SetInputData(mEncoderHandle,&inputData));
+        mNumInputSamples += copy / sizeof(int16_t);
+        if (mNumInputSamples >= kNumSamplesPerFrame) {
+            mNumInputSamples %= kNumSamplesPerFrame;
+            break;
+        }
     }
 
-    CHECK(mInputBuffer != NULL);
+    VO_CODECBUFFER inputData;
+    memset(&inputData, 0, sizeof(inputData));
+    inputData.Buffer = (unsigned char*) mInputFrame;
+    inputData.Length = kNumSamplesPerFrame * sizeof(int16_t);
+    CHECK(VO_ERR_NONE == mApiHandle->SetInputData(mEncoderHandle,&inputData));
 
     VO_CODECBUFFER outputData;
     memset(&outputData, 0, sizeof(outputData));
@@ -239,24 +273,14 @@
     memset(&outputInfo, 0, sizeof(outputInfo));
 
     VO_U32 ret = VO_ERR_NONE;
-    int32_t outputLength = 0;
     outputData.Buffer = outPtr;
     outputData.Length = buffer->size();
     ret = mApiHandle->GetOutputData(mEncoderHandle, &outputData, &outputInfo);
-    if (ret == VO_ERR_NONE || ret == VO_ERR_INPUT_BUFFER_SMALL) {
-        outputLength += outputData.Length;
-        if (ret == VO_ERR_INPUT_BUFFER_SMALL) {  // All done
-            mInputBuffer->release();
-            mInputBuffer = NULL;
-        }
-    } else {
-        LOGE("failed to encode the input data 0x%lx", ret);
-    }
+    CHECK(ret == VO_ERR_NONE || ret == VO_ERR_INPUT_BUFFER_SMALL);
+    CHECK(outputData.Length != 0);
+    buffer->set_range(0, outputData.Length);
 
-    buffer->set_range(0, outputLength);
-
-    // Each output frame compresses 1024 input PCM samples.
-    int64_t timestampUs = ((mFrameCount - 1) * 1000000LL * 1024) / mSampleRate;
+    int64_t timestampUs = ((mFrameCount - 1) * 1000000LL * kNumSamplesPerFrame) / mSampleRate;
     ++mFrameCount;
     buffer->meta_data()->setInt64(kKeyTime, timestampUs);
 
diff --git a/media/libstagefright/include/AACEncoder.h b/media/libstagefright/include/AACEncoder.h
index 211a332..ecc533f 100644
--- a/media/libstagefright/include/AACEncoder.h
+++ b/media/libstagefright/include/AACEncoder.h
@@ -52,6 +52,16 @@
         int32_t           mChannels;
         int32_t           mBitRate;
         int32_t           mFrameCount;
+
+        int64_t           mAnchorTimeUs;
+        int64_t           mNumInputSamples;
+
+        enum {
+            kNumSamplesPerFrame = 1024,
+        };
+
+        int16_t           mInputFrame[kNumSamplesPerFrame];
+
         uint8_t           mAudioSpecificConfigData[2]; // auido specific data
         void             *mEncoderHandle;
         VO_AUDIO_CODECAPI *mApiHandle;