codec2: component H264 decoder implementation

Bug: 63828240
Test: run codec2 binary

Change-Id: Ia5f6797b817eb01bcab69354baea31e7daf01a0f
diff --git a/Android.mk b/Android.mk
index 1a036c3..32b3d74 100644
--- a/Android.mk
+++ b/Android.mk
@@ -10,6 +10,7 @@
         $(TOP)/external/gtest/include \
         $(TOP)/external/v4l2_codec2 \
         $(TOP)/external/v4l2_codec2/vda \
+        $(TOP)/external/v4l2_codec2/vndk/include \
         $(TOP)/frameworks/av/media/libstagefright/codec2/include \
         $(TOP)/frameworks/av/media/libstagefright/codec2/vndk/include \
         $(TOP)/frameworks/av/media/libstagefright/include \
@@ -17,7 +18,8 @@
 LOCAL_MODULE:= libv4l2_codec2
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SHARED_LIBRARIES := libchrome \
+LOCAL_SHARED_LIBRARIES := libbinder \
+                          libchrome \
                           liblog \
                           libmedia \
                           libstagefright \
@@ -25,9 +27,10 @@
                           libstagefright_foundation \
                           libutils \
                           libv4l2_codec2_vda \
+                          libv4l2_codec2_vndk \
 
 # -Wno-unused-parameter is needed for libchrome/base codes
-LOCAL_CFLAGS += -Werror -Wall -Wno-unused-parameter
+LOCAL_CFLAGS += -Werror -Wall -Wno-unused-parameter -std=c++14
 LOCAL_CLANG := true
 LOCAL_SANITIZE := unsigned-integer-overflow signed-integer-overflow
 
@@ -35,4 +38,5 @@
 
 include $(BUILD_SHARED_LIBRARY)
 
-include $(call all-makefiles-under,$(LOCAL_PATH))
+include $(TOP)/external/v4l2_codec2/vda/Android.mk
+include $(TOP)/external/v4l2_codec2/vndk/Android.mk
diff --git a/C2VDAComponent.cpp b/C2VDAComponent.cpp
index ca8e98a..127350c 100644
--- a/C2VDAComponent.cpp
+++ b/C2VDAComponent.cpp
@@ -6,10 +6,12 @@
 #define LOG_TAG "C2VDAComponent"
 
 #include <algorithm>
+#include <inttypes.h>
 
 #include "C2VDAAdaptor.h"
 #define __C2_GENERATE_GLOBAL_VARS__
 #include "C2VDAComponent.h"
+#include "C2VDASupport.h"
 #include "base/bind.h"
 #include "base/bind_helpers.h"
 #include "videodev2.h"
@@ -31,10 +33,9 @@
             | param->type();
 }
 
-
 // Helper function to allocate string type parameters.
 template <class T>
-std::unique_ptr<T> alloc_unique_cstr(const char* cstr) {
+std::unique_ptr<T> allocUniqueCstr(const char* cstr) {
     size_t len = strlen(cstr);
     std::unique_ptr<T> ptr = T::alloc_unique(len);
     memcpy(ptr->m.mValue, cstr, len);
@@ -61,6 +62,9 @@
 
 // Helper function to find int32_t value from C2Value::Primitive vector.
 bool findInt32FromPrimitiveValues(const int32_t& v, const C2FieldSupportedValues& values) {
+    if (values.type == C2FieldSupportedValues::EMPTY) {
+        return false;
+    }
     if (values.type == C2FieldSupportedValues::FLAGS) {
         ALOGE("Type of field supported values should not be FLAGS.");
         return false;
@@ -71,16 +75,15 @@
                ((v - values.range.min.i32) % values.range.step.i32 == 0);
     }
     // if values.type == C2FieldSupportedValues::VALUES
-    for (const auto& value : values.values) {
-        if (value.i32 == v) {
-            return true;
-        }
-    }
-    return false;
+    return std::any_of(values.values.begin(), values.values.end(),
+                       [v=v](const auto& value) { return value.i32 == v; });
 }
 
 // Helper function to find uint32_t value from C2Value::Primitive vector.
 bool findUint32FromPrimitiveValues(const uint32_t& v, const C2FieldSupportedValues& values) {
+    if (values.type == C2FieldSupportedValues::EMPTY) {
+        return false;
+    }
     if (values.type == C2FieldSupportedValues::FLAGS) {
         ALOGE("Type of field supported values should not be FLAGS.");
         return false;
@@ -91,39 +94,17 @@
                ((v - values.range.min.u32) % values.range.step.u32 == 0);
     }
     // if values.type == C2FieldSupportedValues::VALUES
-    for (const auto& value : values.values) {
-        if (value.u32 == v) {
-            return true;
-        }
-    }
-    return false;
+    return std::any_of(values.values.begin(), values.values.end(),
+                       [v=v](const auto& value) { return value.u32 == v; });
+}
+
+// Mask against 30 bits to avoid (undefined) wraparound on signed integer.
+int32_t frameIndexToBitstreamId(uint64_t frameIndex) {
+    return static_cast<int32_t>(frameIndex & 0x3FFFFFFF);
 }
 
 }  // namespace
 
-
-// Neglect flexible flag while matching parameter indices.
-#define CASE(paramType) \
-    case paramType::coreIndex: \
-        return std::unique_ptr<C2StructDescriptor>(new C2StructDescriptor{ \
-                paramType::coreIndex, paramType::fieldList, })
-
-class C2VDAComponentStore::ParamReflector : public C2ParamReflector {
-public:
-    virtual std::unique_ptr<C2StructDescriptor> describe(C2Param::BaseIndex coreIndex) override {
-        switch (coreIndex.coreIndex()) {
-            //CASE(C2ComponentDomainInfo);  //TODO: known codec2 framework bug
-            CASE(C2StreamFormatConfig);
-            CASE(C2VideoSizeStreamInfo);
-            CASE(C2PortMimeConfig);
-            CASE(C2MaxVideoSizeHintPortSetting);
-        }
-        return nullptr;
-    }
-};
-
-#undef CASE
-
 // static
 const uint32_t C2VDAComponentIntf::kInputFormatFourcc = V4L2_PIX_FMT_H264_SLICE;
 
@@ -133,8 +114,9 @@
       mDomainInfo(C2DomainVideo),
       mOutputColorFormat(0u, kColorFormatYUV420Flexible),
       // Support H264 only for now.
-      mInputPortMime(alloc_unique_cstr<C2PortMimeConfig::input>(MEDIA_MIMETYPE_VIDEO_AVC)),
-      mOutputPortMime(alloc_unique_cstr<C2PortMimeConfig::output>(MEDIA_MIMETYPE_VIDEO_RAW)),
+      mInputPortMime(allocUniqueCstr<C2PortMimeConfig::input>(MEDIA_MIMETYPE_VIDEO_AVC)),
+      mOutputPortMime(allocUniqueCstr<C2PortMimeConfig::output>(MEDIA_MIMETYPE_VIDEO_RAW)),
+      mOutputBlockPools(C2PortBlockPoolsTuning::output::alloc_unique({})),
       mSupportedProfiles(C2VDAAdaptor::GetSupportedProfiles(kInputFormatFourcc)) {
     // Get supported profiles from VDA.
     CHECK_GT(mSupportedProfiles.size(), 0u);
@@ -142,14 +124,14 @@
     // Set default codec profile.
     mInputCodecProfile.mValue = mSupportedProfiles[0].profile;
 
-    mMinVideoSize = mSupportedProfiles[0].min_resolution;
-    mMaxVideoSize = mSupportedProfiles[0].max_resolution;
+    auto minVideoSize = mSupportedProfiles[0].min_resolution;
+    auto maxVideoSize = mSupportedProfiles[0].max_resolution;
     // Set default output video size.
-    mVideoSize.mWidth = mMinVideoSize.width();
-    mVideoSize.mHeight = mMinVideoSize.height();
+    mVideoSize.mWidth = minVideoSize.width();
+    mVideoSize.mHeight = minVideoSize.height();
     // Set default max video size.
-    mMaxVideoSizeHint.mWidth = mMaxVideoSize.width();
-    mMaxVideoSizeHint.mHeight = mMaxVideoSize.height();
+    mMaxVideoSizeHint.mWidth = maxVideoSize.width();
+    mMaxVideoSizeHint.mHeight = maxVideoSize.height();
 
     for (const auto& supportedProfile : mSupportedProfiles) {
         mSupportedCodecProfiles.push_back(supportedProfile.profile);
@@ -168,30 +150,29 @@
     insertParam(mOutputPortMime.get());
 
     insertParam(&mInputCodecProfile);
-    // TODO(johnylin): C2FieldSupportedValues for mInputCodecProfile should be VALUES type. But
-    // right now VALUES type only accepts initializer_list argument, we cannot pass a vector.
     mSupportedValues.emplace(
             C2ParamField(&mInputCodecProfile, &C2StreamFormatConfig::mValue),
-            C2FieldSupportedValues(
-                    mSupportedCodecProfiles.front(), mSupportedCodecProfiles.back(), 1u));
+            C2FieldSupportedValues(false, mSupportedCodecProfiles));
 
     // TODO(johnylin): min/max resolution may change by chosen profile, we should dynamically change
     // the supported values in the future.
     insertParam(&mVideoSize);
     mSupportedValues.emplace(
             C2ParamField(&mVideoSize, &C2VideoSizeStreamInfo::mWidth),
-            C2FieldSupportedValues(mMinVideoSize.width(), mMaxVideoSize.width(), 16));
+            C2FieldSupportedValues(minVideoSize.width(), maxVideoSize.width(), 16));
     mSupportedValues.emplace(
             C2ParamField(&mVideoSize, &C2VideoSizeStreamInfo::mHeight),
-            C2FieldSupportedValues(mMinVideoSize.height(), mMaxVideoSize.height(), 16));
+            C2FieldSupportedValues(minVideoSize.height(), maxVideoSize.height(), 16));
 
     insertParam(&mMaxVideoSizeHint);
     mSupportedValues.emplace(
             C2ParamField(&mMaxVideoSizeHint, &C2MaxVideoSizeHintPortSetting::mWidth),
-            C2FieldSupportedValues(mMinVideoSize.width(), mMaxVideoSize.width(), 16));
+            C2FieldSupportedValues(minVideoSize.width(), maxVideoSize.width(), 16));
     mSupportedValues.emplace(
             C2ParamField(&mMaxVideoSizeHint, &C2MaxVideoSizeHintPortSetting::mHeight),
-            C2FieldSupportedValues(mMinVideoSize.height(), mMaxVideoSize.height(), 16));
+            C2FieldSupportedValues(minVideoSize.height(), maxVideoSize.height(), 16));
+
+    insertParam(mOutputBlockPools.get());
 
     mParamDescs.push_back(std::make_shared<C2ParamDescriptor>(
             true, "_domain", &mDomainInfo));
@@ -207,6 +188,8 @@
             false, "_video_size", &mVideoSize));
     mParamDescs.push_back(std::make_shared<C2ParamDescriptor>(
             false, "_max_video_size_hint", &mMaxVideoSizeHint));
+    mParamDescs.push_back(std::make_shared<C2ParamDescriptor>(
+            false, "_output_block_pools", mOutputBlockPools.get()));
 }
 
 C2String C2VDAComponentIntf::getName() const {
@@ -304,6 +287,13 @@
                 err = C2_BAD_VALUE;
                 continue;
             }
+        } else if (index == restoreIndex(mOutputBlockPools.get())) {
+            // setting output block pools
+            // TODO: add support for output-block-pools (this will be done when we move all
+            // config to shared ptr)
+            mOutputBlockPools.reset(
+                    static_cast<C2PortBlockPoolsTuning::output *>(C2Param::Copy(*param).release()));
+            continue;
         }
         myParam->updateFrom(*param);
     }
@@ -330,6 +320,7 @@
 
 c2_status_t C2VDAComponentIntf::querySupportedParams_nb(
         std::vector<std::shared_ptr<C2ParamDescriptor>>* const params) const {
+    params->clear();
     params->insert(params->begin(), mParamDescs.begin(), mParamDescs.end());
     return C2_OK;
 }
@@ -365,12 +356,12 @@
     if (!findInt32FromPrimitiveValues(videoSize->mWidth, widths)) {
         std::unique_ptr<C2SettingResult> result(
                 new C2SettingResult { C2SettingResult::BAD_VALUE,
-                                      { fieldWidth, nullptr /* supportedValues */ },
-                                      {} /* conflictinfFields */ });
-        result->field.values.reset(
-                new C2FieldSupportedValues(widths.range.min,
-                                           widths.range.max,
-                                           widths.range.step));
+                                      { fieldWidth,
+                                        std::make_unique<C2FieldSupportedValues>(
+                                                widths.range.min,
+                                                widths.range.max,
+                                                widths.range.step) },
+                                      {} /* conflicts */ });
         return result;
     }
 
@@ -380,12 +371,12 @@
     if (!findInt32FromPrimitiveValues(videoSize->mHeight, heights)) {
         std::unique_ptr<C2SettingResult> result(
                 new C2SettingResult { C2SettingResult::BAD_VALUE,
-                                      { fieldHeight, nullptr /* supportedValues */ },
-                                      {} /* conflictinfFields */ });
-        result->field.values.reset(
-                new C2FieldSupportedValues(heights.range.min,
-                                           heights.range.max,
-                                           heights.range.step));
+                                      { fieldHeight,
+                                        std::make_unique<C2FieldSupportedValues>(
+                                                heights.range.min,
+                                                heights.range.max,
+                                                heights.range.step) },
+                                      {} /* conflicts */ });
         return result;
     }
 
@@ -399,16 +390,20 @@
 
     C2ParamField field(config, &T::mValue);
     const C2FieldSupportedValues &configs = mSupportedValues.at(field);
-    CHECK_EQ(configs.type, C2FieldSupportedValues::RANGE);
     if (!findUint32FromPrimitiveValues(config->mValue, configs)) {
         std::unique_ptr<C2SettingResult> result(
                 new C2SettingResult { C2SettingResult::BAD_VALUE,
-                                      { field, nullptr /* supportedValues */ },
-                                      {} /* conflictinfFields */ });
-        result->field.values.reset(
-                new C2FieldSupportedValues(configs.range.min,
-                                           configs.range.max,
-                                           configs.range.step));
+                                      { field, nullptr },
+                                      {} /* conflicts */ });
+        if (configs.type == C2FieldSupportedValues::RANGE) {
+            result->field.values.reset(new C2FieldSupportedValues(configs.range.min,
+                                                                  configs.range.max,
+                                                                  configs.range.step));
+        } else if (configs.type == C2FieldSupportedValues::VALUES) {
+            result->field.values.reset(new C2FieldSupportedValues(false, configs.values));
+        } else {
+            return nullptr;
+        }
         return result;
     }
 
@@ -416,6 +411,42 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+#define EXPECT_STATE_OR_RETURN_ON_ERROR(x) \
+    do { \
+        if (mComponentState == ComponentState::ERROR) return; \
+        CHECK_EQ(mComponentState, ComponentState::x); \
+    } while (0)
+
+#define EXPECT_RUNNING_OR_RETURN_ON_ERROR() \
+    do { \
+        if (mComponentState == ComponentState::ERROR) return; \
+        CHECK_NE(mComponentState, ComponentState::UNINITIALIZED); \
+    } while (0)
+
+class C2VDAGraphicBuffer : public C2Buffer {
+public:
+    C2VDAGraphicBuffer(const std::shared_ptr<C2GraphicBlock>& block,
+                       const base::Closure& releaseCB);
+    ~C2VDAGraphicBuffer() override;
+private:
+    base::Closure mReleaseCB;
+};
+
+C2VDAGraphicBuffer::C2VDAGraphicBuffer(const std::shared_ptr<C2GraphicBlock>& block,
+                                       const base::Closure& releaseCB)
+    : C2Buffer({ block->share(C2Rect(block->width(), block->height()), C2Fence()) }),
+      mReleaseCB(releaseCB) {}
+
+C2VDAGraphicBuffer::~C2VDAGraphicBuffer() {
+    if (!mReleaseCB.is_null()) {
+      mReleaseCB.Run();
+    }
+}
+
+C2VDAComponent::VideoFormat::VideoFormat(uint32_t pixelFormat, uint32_t minNumBuffers,
+                                         media::Size codedSize, media::Rect visibleRect)
+    : mPixelFormat(pixelFormat), mMinNumBuffers(minNumBuffers), mCodedSize(codedSize),
+      mVisibleRect(visibleRect) {}
 
 C2VDAComponent::C2VDAComponent(C2String name,
                                c2_node_id_t id)
@@ -423,8 +454,11 @@
       mThread("C2VDAComponentThread"),
       mVDAInitResult(VideoDecodeAcceleratorAdaptor::Result::ILLEGAL_STATE),
       mComponentState(ComponentState::UNINITIALIZED),
+      mColorFormat(0u),
+      mLastOutputTimestamp(-1),
       mCodecProfile(media::VIDEO_CODEC_PROFILE_UNKNOWN),
-      mState(State::UNLOADED) {
+      mState(State::UNLOADED),
+      mWeakThisFactory(this) {
     if (!mThread.Start()) {
         ALOGE("Component thread failed to start");
         return;
@@ -442,10 +476,9 @@
                 FROM_HERE, base::Bind(&C2VDAComponent::onDestroy, base::Unretained(this)));
         mThread.Stop();
     }
-    CHECK(!mThread.IsRunning());
 }
 
-void C2VDAComponent::getParameters() {
+void C2VDAComponent::fetchParametersFromIntf() {
     C2StreamFormatConfig::input codecProfile;
     std::vector<C2Param* const> stackParams{ &codecProfile };
     CHECK_EQ(mIntf->query_nb(stackParams, {}, nullptr), C2_OK);
@@ -464,7 +497,10 @@
 void C2VDAComponent::onDestroy() {
     DCHECK(mTaskRunner->BelongsToCurrentThread());
     ALOGV("onDestroy");
-    mVDAAdaptor.reset(nullptr);
+    if (mVDAAdaptor.get()) {
+        mVDAAdaptor->destroy();
+        mVDAAdaptor.reset(nullptr);
+    }
 }
 
 void C2VDAComponent::onStart(media::VideoCodecProfile profile, base::WaitableEvent* done) {
@@ -477,37 +513,527 @@
     if (mVDAInitResult == VideoDecodeAcceleratorAdaptor::Result::SUCCESS) {
         mComponentState = ComponentState::STARTED;
     }
+
     done->Signal();
 }
 
+void C2VDAComponent::onQueueWork(std::unique_ptr<C2Work> work) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onQueueWork");
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+    // It is illegal for client to put new works while component is still flushing.
+    CHECK_NE(mComponentState, ComponentState::FLUSHING);
+    mQueue.emplace(std::move(work));
+    // TODO(johnylin): set a maximum size of mQueue and check if mQueue is already full.
+
+    mTaskRunner->PostTask(FROM_HERE,
+                          base::Bind(&C2VDAComponent::onDequeueWork, base::Unretained(this)));
+}
+
+void C2VDAComponent::onDequeueWork() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onDequeueWork");
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+    if (mQueue.empty()) {
+        return;
+    }
+    if (mComponentState == ComponentState::DRAINING) {
+        ALOGV("Temporarily stop dequeueing works since component is draining.");
+        return;
+    }
+    if (mComponentState != ComponentState::STARTED) {
+        ALOGE("Work queue should be empty if the component is not in STARTED state.");
+        return;
+    }
+
+    // Dequeue a work from mQueue.
+    std::unique_ptr<C2Work> work(std::move(mQueue.front()));
+    mQueue.pop();
+
+    // Send input buffer to VDA for decode.
+    // Use frame_index as bitstreamId.
+    CHECK_EQ(work->input.buffers.size(), 1u);
+    C2ConstLinearBlock linearBlock = work->input.buffers.front()->data().linearBlocks().front();
+    if (linearBlock.size() > 0) {
+        int32_t bitstreamId = frameIndexToBitstreamId(work->input.ordinal.frame_index);
+        sendInputBufferToAccelerator(linearBlock, bitstreamId);
+    }
+
+    if (work->input.flags & C2BufferPack::FLAG_END_OF_STREAM) {
+        mVDAAdaptor->flush();
+        mComponentState = ComponentState::DRAINING;
+    }
+
+    // Put work to mPendingWork.
+    mPendingWorks.emplace_back(std::move(work));
+
+    if (!mQueue.empty()) {
+        mTaskRunner->PostTask(FROM_HERE,
+                              base::Bind(&C2VDAComponent::onDequeueWork, base::Unretained(this)));
+    }
+}
+
+void C2VDAComponent::onInputBufferDone(int32_t bitstreamId) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onInputBufferDone: bitstream id=%d", bitstreamId);
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
+    C2Work* work = getPendingWorkByBitstreamId(bitstreamId);
+    if (!work) {
+        reportError(C2_CORRUPTED);
+        return;
+    }
+
+    // When the work is done, the input buffers vector shall be cleared by component.
+    work->input.buffers.clear();
+
+    reportFinishedWorkIfAny();
+}
+
+// This is used as callback while output buffer is released by client.
+// TODO(johnylin): consider to use C2Buffer::registerOnDestroyNotify instead
+void C2VDAComponent::returnOutputBuffer(int32_t pictureBufferId) {
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onOutputBufferReturned,
+                                                base::Unretained(this), pictureBufferId));
+}
+
+void C2VDAComponent::onOutputBufferReturned(int32_t pictureBufferId) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onOutputBufferReturned: picture id=%d", pictureBufferId);
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
+    GraphicBlockInfo* info = getGraphicBlockById(pictureBufferId);
+    if (!info) {
+        reportError(C2_CORRUPTED);
+        return;
+    }
+    CHECK_EQ(info->mState, GraphicBlockInfo::State::OWNED_BY_CLIENT);
+    info->mState = GraphicBlockInfo::State::OWNED_BY_COMPONENT;
+
+    if (mPendingOutputFormat) {
+        tryChangeOutputFormat();
+    } else {
+        sendOutputBufferToAccelerator(info);
+    }
+}
+
+void C2VDAComponent::onOutputBufferDone(int32_t pictureBufferId, int32_t bitstreamId) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onOutputBufferDone: picture id=%d, bitstream id=%d", pictureBufferId, bitstreamId);
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
+    C2Work* work = getPendingWorkByBitstreamId(bitstreamId);
+    if (!work) {
+        reportError(C2_CORRUPTED);
+        return;
+    }
+    GraphicBlockInfo* info = getGraphicBlockById(pictureBufferId);
+    if (!info) {
+        reportError(C2_CORRUPTED);
+        return;
+    }
+    CHECK_EQ(info->mState, GraphicBlockInfo::State::OWNED_BY_ACCELERATOR);
+    // Output buffer will be passed to client soon along with mListener->onWorkDone_nb().
+    info->mState = GraphicBlockInfo::State::OWNED_BY_CLIENT;
+
+    // Attach output buffer to the work corresponded to bitstreamId.
+    CHECK_EQ(work->worklets.size(), 1u);
+    work->worklets.front()->output.buffers.clear();
+    work->worklets.front()->output.buffers.emplace_back(
+            std::make_shared<C2VDAGraphicBuffer>(info->mGraphicBlock,
+                                                 base::Bind(&C2VDAComponent::returnOutputBuffer,
+                                                            mWeakThisFactory.GetWeakPtr(),
+                                                            pictureBufferId)));
+    work->worklets.front()->output.ordinal = work->input.ordinal;
+    work->worklets_processed = 1u;
+
+    int64_t currentTimestamp = base::checked_cast<int64_t>(work->input.ordinal.timestamp);
+    CHECK_GE(currentTimestamp, mLastOutputTimestamp);
+    mLastOutputTimestamp = currentTimestamp;
+
+    reportFinishedWorkIfAny();
+}
+
+void C2VDAComponent::onDrain() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onDrain");
+    EXPECT_STATE_OR_RETURN_ON_ERROR(STARTED);
+
+    // Set input flag as C2BufferPack::FLAG_END_OF_STREAM to the last queued work. If mQueue is
+    // empty, set to the last work in mPendingWorks and then signal flush immediately.
+    if (!mQueue.empty()) {
+        mQueue.back()->input.flags = static_cast<C2BufferPack::flags_t>(
+                mQueue.back()->input.flags | C2BufferPack::FLAG_END_OF_STREAM);
+    } else if (!mPendingWorks.empty()) {
+        C2Work* work = getPendingWorkLastToFinish();
+        if (!work) {
+            reportError(C2_CORRUPTED);
+            return;
+        }
+        mPendingWorks.back()->input.flags = static_cast<C2BufferPack::flags_t>(
+                mPendingWorks.back()->input.flags | C2BufferPack::FLAG_END_OF_STREAM);
+        mVDAAdaptor->flush();
+        mComponentState = ComponentState::DRAINING;
+    } else {
+        // Do nothing.
+        ALOGV("No buffers in VDA, drain takes no effect.");
+    }
+}
+
+void C2VDAComponent::onDrainDone() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onDrainDone");
+    if (mComponentState == ComponentState::DRAINING) {
+        mComponentState = ComponentState::STARTED;
+    } else if (mComponentState == ComponentState::STOPPING) {
+        // The client signals stop right before VDA notifies drain done. Let stop process goes.
+        return;
+    } else {
+        ALOGE("Unexpected state while onDrainDone(). State=%d", mComponentState);
+        reportError(C2_BAD_STATE);
+        return;
+    }
+
+    // Last stream is finished. Reset the timestamp record.
+    mLastOutputTimestamp = -1;
+
+    // Work dequeueing was stopped while component draining. Restart it.
+    mTaskRunner->PostTask(FROM_HERE,
+                          base::Bind(&C2VDAComponent::onDequeueWork, base::Unretained(this)));
+}
+
+void C2VDAComponent::onFlush() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onFlush");
+    EXPECT_STATE_OR_RETURN_ON_ERROR(STARTED);
+
+    mVDAAdaptor->reset();
+    // Pop all works in mQueue and put into mPendingWorks.
+    while (!mQueue.empty()) {
+        mPendingWorks.emplace_back(std::move(mQueue.front()));
+        mQueue.pop();
+    }
+    mComponentState = ComponentState::FLUSHING;
+}
+
 void C2VDAComponent::onStop(base::WaitableEvent* done) {
     DCHECK(mTaskRunner->BelongsToCurrentThread());
     ALOGV("onStop");
-    CHECK(mComponentState != ComponentState::UNINITIALIZED);
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
     mVDAAdaptor->reset();
+    // Pop all works in mQueue and put into mPendingWorks.
+    while (!mQueue.empty()) {
+        mPendingWorks.emplace_back(std::move(mQueue.front()));
+        mQueue.pop();
+    }
+
     mStopDoneEvent = done;  // restore done event which shoud be signaled in onStopDone().
     mComponentState = ComponentState::STOPPING;
 }
 
-void C2VDAComponent::onStopDone() {
+void C2VDAComponent::onResetDone() {
     DCHECK(mTaskRunner->BelongsToCurrentThread());
+    if (mComponentState == ComponentState::ERROR) {
+        return;
+    }
+    if (mComponentState == ComponentState::FLUSHING) {
+        onFlushDone();
+    } else if (mComponentState == ComponentState::STOPPING) {
+        onStopDone();
+    } else {
+        reportError(C2_CORRUPTED);
+    }
+}
+
+void C2VDAComponent::onFlushDone() {
+    ALOGV("onFlushDone");
+    reportAbandonedWorks();
+    // Reset the timestamp record.
+    mLastOutputTimestamp = -1;
+    mComponentState = ComponentState::STARTED;
+}
+
+void C2VDAComponent::onStopDone() {
     ALOGV("onStopDone");
-    CHECK(mComponentState == ComponentState::STOPPING);
     CHECK(mStopDoneEvent);
-    mVDAAdaptor->destroy();
+
+    // Release the graphic block allocator object.
+    mOutputBlockPool.reset();
+
+    // TODO(johnylin): At this moment, there may be C2Buffer still owned by client, do we need to
+    // do something for them?
+    reportAbandonedWorks();
+    mPendingOutputFormat.reset();
+    mColorFormat = 0u;
+    mLastOutputTimestamp = -1;
+    if (mVDAAdaptor.get()) {
+        mVDAAdaptor->destroy();
+        mVDAAdaptor.reset(nullptr);
+    }
+
+    mGraphicBlocks.clear();
+
     mStopDoneEvent->Signal();
     mStopDoneEvent = nullptr;
     mComponentState = ComponentState::UNINITIALIZED;
 }
 
 c2_status_t C2VDAComponent::setListener_sm(const std::shared_ptr<C2Component::Listener> &listener) {
+    // TODO(johnylin): API says this method must be supported in all states, however I'm quite not
+    //                 sure what is the use case.
+    if (mState != State::LOADED) {
+        return C2_BAD_STATE;
+    }
     mListener = listener;
-    return C2_OMITTED;
+    return C2_OK;
+}
+
+void C2VDAComponent::sendInputBufferToAccelerator(const C2ConstLinearBlock& input,
+                                                  int32_t bitstreamId) {
+    ALOGV("sendInputBufferToAccelerator");
+    int dupFd = dup(input.handle()->data[0]);
+    if (dupFd < 0) {
+        ALOGE("Failed to dup(%d) input buffer (bitstreamId=%d), errno=%d",
+              input.handle()->data[0], bitstreamId, errno);
+        reportError(C2_CORRUPTED);
+        return;
+    }
+    ALOGV("Decode bitstream ID: %d, offset: %u size: %u",
+          bitstreamId, input.offset(), input.size());
+    mVDAAdaptor->decode(bitstreamId, dupFd, input.offset(), input.size());
+}
+
+C2Work* C2VDAComponent::getPendingWorkByBitstreamId(int32_t bitstreamId) {
+    auto workIter = std::find_if(
+            mPendingWorks.begin(), mPendingWorks.end(),
+            [bitstreamId](const std::unique_ptr<C2Work>& w) {
+                return frameIndexToBitstreamId(w->input.ordinal.frame_index) == bitstreamId; });
+
+    if (workIter == mPendingWorks.end()) {
+        ALOGE("Can't find pending work by bitstream ID: %d", bitstreamId);
+        return nullptr;
+    }
+    return workIter->get();
+}
+
+C2Work* C2VDAComponent::getPendingWorkLastToFinish() {
+    // Get the work with largest timestamp.
+    auto workIter = std::max_element(
+            mPendingWorks.begin(),
+            mPendingWorks.end(),
+            [](const auto& w1, const auto& w2) {
+                    return w1->input.ordinal.timestamp < w2->input.ordinal.timestamp; });
+
+    if (workIter == mPendingWorks.end()) {
+        ALOGE("Can't get last finished work from mPendingWork");
+        return nullptr;
+    }
+    return workIter->get();
+}
+
+C2VDAComponent::GraphicBlockInfo* C2VDAComponent::getGraphicBlockById(int32_t blockId) {
+    if (blockId < 0 || blockId >= static_cast<int32_t>(mGraphicBlocks.size())){
+        ALOGE("getGraphicBlockById failed: id=%d", blockId);
+        return nullptr;
+    }
+    return &mGraphicBlocks[blockId];
+}
+
+void C2VDAComponent::onOutputFormatChanged(std::unique_ptr<VideoFormat> format) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onOutputFormatChanged");
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
+    ALOGV("New output format(pixel_format=0x%x, min_num_buffers=%u, coded_size=%s, crop_rect=%s)",
+          format->mPixelFormat, format->mMinNumBuffers, format->mCodedSize.ToString().c_str(),
+          format->mVisibleRect.ToString().c_str());
+
+    for (auto& info : mGraphicBlocks) {
+        if (info.mState == GraphicBlockInfo::State::OWNED_BY_ACCELERATOR)
+            info.mState = GraphicBlockInfo::State::OWNED_BY_COMPONENT;
+    }
+
+    CHECK(!mPendingOutputFormat);
+    mPendingOutputFormat = std::move(format);
+    tryChangeOutputFormat();
+}
+
+void C2VDAComponent::tryChangeOutputFormat() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("tryChangeOutputFormat");
+    CHECK(mPendingOutputFormat);
+
+    // Change the output format only after all output buffers are returned
+    // from clients.
+    for (const auto& info : mGraphicBlocks) {
+        if (info.mState == GraphicBlockInfo::State::OWNED_BY_CLIENT) {
+            ALOGV("wait buffer: %d for output format change", info.mBlockId);
+            return;
+        }
+    }
+
+    uint32_t colorFormat;
+    int bufferFormat;
+    switch (mPendingOutputFormat->mPixelFormat) {
+    case HAL_PIXEL_FORMAT_YCbCr_420_888:
+        colorFormat = kColorFormatYUV420Flexible;
+        bufferFormat = HAL_PIXEL_FORMAT_YCbCr_420_888;
+        break;
+    default:
+        ALOGE("pixel format: 0x%x is not supported", mPendingOutputFormat->mPixelFormat);
+        reportError(C2_OMITTED);
+        return;
+    }
+
+    mOutputFormat.mPixelFormat = mPendingOutputFormat->mPixelFormat;
+    mOutputFormat.mMinNumBuffers = mPendingOutputFormat->mMinNumBuffers;
+    mOutputFormat.mCodedSize = mPendingOutputFormat->mCodedSize;
+
+    setOutputFormatCrop(mPendingOutputFormat->mVisibleRect);
+    mColorFormat = colorFormat;
+
+    c2_status_t err = allocateBuffersFromBlockAllocator(mPendingOutputFormat->mCodedSize,
+                                                        bufferFormat);
+    if (err != C2_OK) {
+        reportError(err);
+        return;
+    }
+
+    for (auto& info : mGraphicBlocks) {
+        sendOutputBufferToAccelerator(&info);
+    }
+    mPendingOutputFormat.reset();
+}
+
+c2_status_t C2VDAComponent::allocateBuffersFromBlockAllocator(const media::Size& size,
+                                                              int pixelFormat) {
+    ALOGV("allocateBuffersFromBlockAllocator(%s, 0x%x)", size.ToString().c_str(), pixelFormat);
+
+    size_t bufferCount = mOutputFormat.mMinNumBuffers + kDpbOutputBufferExtraCount;
+
+    // Allocate the output buffers.
+    mVDAAdaptor->assignPictureBuffers(bufferCount);
+
+    // TODO: lock access to interface
+    C2BlockPool::local_id_t poolId = mIntf->mOutputBlockPools->flexCount() ?
+            mIntf->mOutputBlockPools->m.mValues[0] : C2BlockPool::BASIC_GRAPHIC;
+    ALOGI("Using C2BlockPool ID = %" PRIu64 " for allocating output buffers", poolId);
+    c2_status_t err;
+    if (!mOutputBlockPool || mOutputBlockPool->getLocalId() != poolId) {
+        err = getCodec2BlockPool(poolId, shared_from_this(), &mOutputBlockPool);
+        if (err != C2_OK) {
+            ALOGE("Graphic block allocator is invalid");
+            reportError(err);
+            return err;
+        }
+    }
+
+    mGraphicBlocks.clear();
+    for (size_t i = 0; i < bufferCount; ++i) {
+        std::shared_ptr<C2GraphicBlock> block;
+        C2MemoryUsage usage = { C2MemoryUsage::kSoftwareRead, 0 };
+        err = mOutputBlockPool->fetchGraphicBlock(size.width(), size.height(), pixelFormat, usage,
+                                                  &block);
+        if (err != C2_OK) {
+            mGraphicBlocks.clear();
+            ALOGE("failed to allocate buffer: %d", err);
+            reportError(err);
+            return err;
+        }
+        appendOutputBuffer(std::move(block));
+    }
+    mOutputFormat.mMinNumBuffers = bufferCount;
+    return C2_OK;
+}
+
+void C2VDAComponent::appendOutputBuffer(std::shared_ptr<C2GraphicBlock> block) {
+    GraphicBlockInfo info;
+    info.mBlockId = mGraphicBlocks.size();
+    info.mGraphicBlock = std::move(block);
+
+    const C2GraphicView& view = info.mGraphicBlock->map().get();
+    const uint8_t* const* data = view.data();
+    CHECK_NE(data, nullptr);
+    const C2PlaneLayout& layout = view.layout();
+
+    ALOGV("allocate graphic buffer: %p, id: %u, size: %dx%d",
+           info.mGraphicBlock->handle(), info.mBlockId,
+           info.mGraphicBlock->width(), info.mGraphicBlock->height());
+
+    // get offset from data pointers
+    uint32_t offsets[C2PlaneLayout::MAX_NUM_PLANES];
+    auto baseAddress = reinterpret_cast<intptr_t>(data[0]);
+    for (uint32_t i = 0; i < layout.mNumPlanes; ++i) {
+        auto planeAddress = reinterpret_cast<intptr_t>(data[i]);
+        offsets[i] = static_cast<uint32_t>(planeAddress - baseAddress);
+    }
+
+    for (uint32_t i = 0; i < layout.mNumPlanes; ++i) {
+        ALOGV("plane %u: stride: %d, offset: %u", i, layout.mPlanes[i].mRowInc, offsets[i]);
+    }
+
+    base::ScopedFD passedHandle(dup(info.mGraphicBlock->handle()->data[0]));
+    if (!passedHandle.is_valid()) {
+        ALOGE("Failed to dup(%d), errno=%d", info.mGraphicBlock->handle()->data[0], errno);
+        reportError(C2_CORRUPTED);
+        return;
+    }
+    std::vector<VideoFramePlane> passedPlanes;
+    for (uint32_t i = 0; i < layout.mNumPlanes; ++i) {
+        CHECK_GT(layout.mPlanes[i].mRowInc, 0);
+        passedPlanes.push_back({ offsets[i], static_cast<uint32_t>(layout.mPlanes[i].mRowInc) });
+    }
+    info.mHandle = std::move(passedHandle);
+    info.mPlanes = std::move(passedPlanes);
+
+    mGraphicBlocks.push_back(std::move(info));
+}
+
+void C2VDAComponent::sendOutputBufferToAccelerator(GraphicBlockInfo* info) {
+    ALOGV("sendOutputBufferToAccelerator index=%d", info->mBlockId);
+    CHECK_EQ(info->mState, GraphicBlockInfo::State::OWNED_BY_COMPONENT);
+    info->mState = GraphicBlockInfo::State::OWNED_BY_ACCELERATOR;
+
+    // is_valid() is true for the first time the buffer is passed to VDA. In that case, VDA needs to
+    // import the buffer first.
+    if (info->mHandle.is_valid()) {
+        mVDAAdaptor->importBufferForPicture(
+                info->mBlockId, info->mHandle.release(), info->mPlanes);
+    } else {
+        mVDAAdaptor->reusePictureBuffer(info->mBlockId);
+    }
+}
+
+void C2VDAComponent::onVisibleRectChanged(const media::Rect& cropRect) {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    ALOGV("onVisibleRectChanged");
+    EXPECT_RUNNING_OR_RETURN_ON_ERROR();
+
+    // We should make sure there is no pending output format change. That is, the input cropRect is
+    // corresponding to current output format.
+    CHECK(mPendingOutputFormat == nullptr);
+    setOutputFormatCrop(cropRect);
+}
+
+void C2VDAComponent::setOutputFormatCrop(const media::Rect& cropRect) {
+    ALOGV("setOutputFormatCrop(%dx%d)", cropRect.width(), cropRect.height());
+    mOutputFormat.mVisibleRect = cropRect;
+    // TODO(johnylin): what else do we need to do? crop rect could be an info requested from
+    // framework by requestedInfos in worklets.
 }
 
 c2_status_t C2VDAComponent::queue_nb(std::list<std::unique_ptr<C2Work>>* const items) {
-    UNUSED(items);
-    return C2_OMITTED;
+    if (mState != State::RUNNING) {
+        return C2_BAD_STATE;
+    }
+    while (!items->empty()) {
+        mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onQueueWork,
+                                                    base::Unretained(this),
+                                                    base::Passed(&items->front())));
+        items->pop_front();
+    }
+    return C2_OK;
 }
 
 c2_status_t C2VDAComponent::announce_nb(const std::vector<C2WorkOutline>& items) {
@@ -517,16 +1043,26 @@
 
 c2_status_t C2VDAComponent::flush_sm(
         flush_mode_t mode, std::list<std::unique_ptr<C2Work>>* const flushedWork) {
-    if (mode != FLUSH_COMPONENT)
+    if (mode != FLUSH_COMPONENT) {
         return C2_OMITTED;  // Tunneling is not supported by now
-    UNUSED(flushedWork);
-    return C2_OMITTED;
+    }
+    if (mState != State::RUNNING) {
+        return C2_BAD_STATE;
+    }
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onFlush, base::Unretained(this)));
+    // Instead of |flushedWork|, abandoned works will be returned via onWorkDone_nb() callback.
+    return C2_OK;
 }
 
 c2_status_t C2VDAComponent::drain_nb(drain_mode_t mode) {
-    if (mode != DRAIN_COMPONENT)
+    if (mode != DRAIN_COMPONENT) {
         return C2_OMITTED;  // Tunneling is not supported by now
-    return C2_OMITTED;
+    }
+    if (mState != State::RUNNING) {
+        return C2_BAD_STATE;
+    }
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onDrain, base::Unretained(this)));
+    return C2_OK;
 }
 
 c2_status_t C2VDAComponent::start() {
@@ -534,7 +1070,7 @@
         return C2_BAD_STATE;  // start() is only supported when component is in LOADED state.
     }
 
-    getParameters();
+    fetchParametersFromIntf();
     base::WaitableEvent done(base::WaitableEvent::ResetPolicy::AUTOMATIC,
                              base::WaitableEvent::InitialState::NOT_SIGNALED);
     mTaskRunner->PostTask(
@@ -565,7 +1101,8 @@
 
 void C2VDAComponent::reset() {
     stop();
-    // TODO(johnylin): what is the use case for calling reset() instead of stop()?
+    // TODO(johnylin): reset is different than stop that it could be called in any state.
+    // TODO(johnylin): when reset is called, set ComponentInterface to default values.
 }
 
 void C2VDAComponent::release() {
@@ -578,39 +1115,165 @@
 
 void C2VDAComponent::providePictureBuffers(
         uint32_t pixelFormat, uint32_t minNumBuffers, const media::Size& codedSize) {
-    UNUSED(pixelFormat);
-    UNUSED(minNumBuffers);
-    UNUSED(codedSize);
+    // Uses coded size for crop rect while it is not available.
+    auto format = std::make_unique<VideoFormat>(
+            pixelFormat, minNumBuffers, codedSize, media::Rect(codedSize));
+
+    // Set mRequestedVisibleRect to default.
+    mRequestedVisibleRect = media::Rect();
+
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onOutputFormatChanged,
+                                                base::Unretained(this), base::Passed(&format)));
 }
 
 void C2VDAComponent::dismissPictureBuffer(int32_t pictureBufferId) {
     UNUSED(pictureBufferId);
+    // no ops
 }
 
 void C2VDAComponent::pictureReady(
         int32_t pictureBufferId, int32_t bitstreamId, const media::Rect& cropRect) {
     UNUSED(pictureBufferId);
     UNUSED(bitstreamId);
-    UNUSED(cropRect);
+
+    if (mRequestedVisibleRect != cropRect) {
+        mRequestedVisibleRect = cropRect;
+        mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onVisibleRectChanged,
+                                                    base::Unretained(this), cropRect));
+    }
+
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onOutputBufferDone,
+                                                base::Unretained(this),
+                                                pictureBufferId, bitstreamId));
 }
 
 void C2VDAComponent::notifyEndOfBitstreamBuffer(int32_t bitstreamId) {
-    UNUSED(bitstreamId);
+    mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onInputBufferDone,
+                                                base::Unretained(this), bitstreamId));
 }
 
 void C2VDAComponent::notifyFlushDone() {
+    mTaskRunner->PostTask(FROM_HERE,
+                          base::Bind(&C2VDAComponent::onDrainDone, base::Unretained(this)));
 }
 
 void C2VDAComponent::notifyResetDone() {
     mTaskRunner->PostTask(FROM_HERE,
-                          base::Bind(&C2VDAComponent::onStopDone, base::Unretained(this)));
+                          base::Bind(&C2VDAComponent::onResetDone, base::Unretained(this)));
 }
 
 void C2VDAComponent::notifyError(VideoDecodeAcceleratorAdaptor::Result error) {
-    UNUSED(error);
+    ALOGE("Got notifyError from VDA error=%d", error);
+    c2_status_t err;
+    switch(error) {
+    case VideoDecodeAcceleratorAdaptor::Result::ILLEGAL_STATE:
+        err = C2_BAD_STATE;
+        break;
+    case VideoDecodeAcceleratorAdaptor::Result::INVALID_ARGUMENT:
+    case VideoDecodeAcceleratorAdaptor::Result::UNREADABLE_INPUT:
+        err = C2_BAD_VALUE;
+        break;
+    case VideoDecodeAcceleratorAdaptor::Result::PLATFORM_FAILURE:
+        err = C2_CORRUPTED;
+        break;
+    case VideoDecodeAcceleratorAdaptor::Result::INSUFFICIENT_RESOURCES:
+        err = C2_NO_MEMORY;
+        break;
+    case VideoDecodeAcceleratorAdaptor::Result::SUCCESS:
+        ALOGE("Shouldn't get SUCCESS err code in NotifyError(). Skip it...");
+        return;
+    }
+    reportError(err);
+}
+
+void C2VDAComponent::reportFinishedWorkIfAny() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    std::vector<std::unique_ptr<C2Work>> finishedWorks;
+
+    // Work should be reported as done if both input and output buffer are returned by VDA.
+
+    // Note that not every input buffer has matched output (ex. CSD header for H.264).
+    // However, the timestamp is guaranteed to be monotonic increasing for buffers in display order.
+    // That is, since VDA output is in display order, if we get a returned output with timestamp T,
+    // it implies all works with timestamp <= T are done.
+    auto iter = mPendingWorks.begin();
+    while (iter != mPendingWorks.end()) {
+        if (isWorkDone(iter->get())) {
+            iter->get()->result = C2_OK;
+            finishedWorks.emplace_back(std::move(*iter));
+            iter = mPendingWorks.erase(iter);
+        } else {
+            ++iter;
+        }
+    }
+
+    if (!finishedWorks.empty()) {
+        mListener->onWorkDone_nb(shared_from_this(), std::move(finishedWorks));
+    }
+}
+
+bool C2VDAComponent::isWorkDone(const C2Work* work) const {
+    if (!work->input.buffers.empty()) {
+        return false;  // Input buffer is still owned by VDA.
+    }
+    if (mLastOutputTimestamp < 0) {
+        return false;  // No output buffer is returned yet.
+    }
+    if (work->input.ordinal.timestamp > static_cast<uint64_t>(mLastOutputTimestamp)) {
+        return false;  // Output buffer is not returned by VDA yet.
+    }
+    return true;  // Output buffer is returned, or it has no related output buffer.
+}
+
+void C2VDAComponent::reportAbandonedWorks() {
+    DCHECK(mTaskRunner->BelongsToCurrentThread());
+    std::vector<std::unique_ptr<C2Work>> abandonedWorks;
+
+    while (!mPendingWorks.empty()) {
+        std::unique_ptr<C2Work> work(std::move(mPendingWorks.front()));
+        mPendingWorks.pop_front();
+
+        work->result = static_cast<c2_status_t>(-1);  // What should this value be?
+        // When the work is abandoned, the input buffers vector shall be cleared by component.
+        work->input.buffers.clear();
+        abandonedWorks.emplace_back(std::move(work));
+    }
+
+    if (!abandonedWorks.empty()) {
+        mListener->onWorkDone_nb(shared_from_this(), std::move(abandonedWorks));
+    }
+}
+
+void C2VDAComponent::reportError(c2_status_t error) {
+    uint32_t reported_error = -error;
+    // Why onError_nb takes uint32_t while c2_status_t is mostly negative numbers?
+    mListener->onError_nb(shared_from_this(), reported_error);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Neglect flexible flag while matching parameter indices.
+#define CASE(paramType) \
+    case paramType::coreIndex: \
+        return std::unique_ptr<C2StructDescriptor>(new C2StructDescriptor{ \
+                paramType::coreIndex, paramType::fieldList, })
+
+class C2VDAComponentStore::ParamReflector : public C2ParamReflector {
+public:
+    virtual std::unique_ptr<C2StructDescriptor> describe(C2Param::BaseIndex coreIndex) override {
+        switch (coreIndex.coreIndex()) {
+            //CASE(C2ComponentDomainInfo);  //TODO: known codec2 framework bug
+            CASE(C2StreamFormatConfig);
+            CASE(C2VideoSizeStreamInfo);
+            CASE(C2PortMimeConfig);
+            CASE(C2MaxVideoSizeHintPortSetting);
+        }
+        return nullptr;
+    }
+};
+
+#undef CASE
+
+// TODO(johnylin): implement C2VDAComponentStore
 C2VDAComponentStore::C2VDAComponentStore()
     : mParamReflector(std::make_shared<ParamReflector>()) {
 }
diff --git a/C2VDAComponent.h b/C2VDAComponent.h
index ecc4cd9..716fc53 100644
--- a/C2VDAComponent.h
+++ b/C2VDAComponent.h
@@ -5,7 +5,9 @@
 #ifndef C2_VDA_COMPONENT_H_
 #define C2_VDA_COMPONENT_H_
 
+#include <deque>
 #include <map>
+#include <queue>
 #include <unordered_map>
 
 #include "VideoDecodeAcceleratorAdaptor.h"
@@ -55,6 +57,8 @@
             std::vector<C2FieldSupportedValuesQuery>& fields) const override;
 
 private:
+    friend class C2VDAComponent;
+
     const C2String kName;
     const c2_node_id_t kId;
     //TODO: in the future different codec (h264/vp8/vp9) would be different class inherited from a
@@ -86,6 +90,8 @@
     C2VideoSizeStreamInfo::output mVideoSize;
     // Max video size for video decoder.
     C2MaxVideoSizeHintPortSetting::input mMaxVideoSizeHint;
+    // The directive of output block pool usage
+    std::unique_ptr<C2PortBlockPoolsTuning::output> mOutputBlockPools;
 
     std::unordered_map<uint32_t, C2Param*> mParams;
     // C2ParamField is LessThanComparable
@@ -94,9 +100,6 @@
 
     media::VideoDecodeAccelerator::SupportedProfiles mSupportedProfiles;
     std::vector<uint32_t> mSupportedCodecProfiles;
-    media::Size mMaxVideoSize;
-    media::Size mMinVideoSize;
-
 };
 
 class C2VDAComponent
@@ -153,6 +156,9 @@
         UNINITIALIZED,
         // VDA initialization returns successfully. VDA is ready to make progress.
         STARTED,
+        // onDrain() is called. VDA is draining. Component will hold on queueing works until
+        // onDrainDone().
+        DRAINING,
         // onFlush() is called. VDA is flushing. State will change to STARTED after onFlushDone().
         FLUSHING,
         // onStop() is called. VDA is shutting down. State will change to UNINITIALIZED after
@@ -162,16 +168,91 @@
         ERROR,
     };
 
+    enum {
+        kDpbOutputBufferExtraCount = 3,  // Use the same number as ACodec.
+    };
+
+    // Internal struct to keep the information of a specific graphic block.
+    struct GraphicBlockInfo {
+        enum class State {
+            OWNED_BY_COMPONENT,         // Owned by this component.
+            OWNED_BY_ACCELERATOR,       // Owned by video decode accelerator.
+            OWNED_BY_CLIENT,            // Owned by client.
+        };
+
+        int32_t mBlockId = -1;
+        State mState = State::OWNED_BY_COMPONENT;
+        // Graphic block buffer allocated from allocator. This should be reused.
+        std::shared_ptr<C2GraphicBlock> mGraphicBlock;
+        // The handle dupped from graphic block for importing to VDA.
+        base::ScopedFD mHandle;
+        // VideoFramePlane information for importing to VDA.
+        std::vector<VideoFramePlane> mPlanes;
+    };
+
+    struct VideoFormat {
+        uint32_t mPixelFormat = 0;
+        uint32_t mMinNumBuffers = 0;
+        media::Size mCodedSize;
+        media::Rect mVisibleRect;
+
+        VideoFormat() {}
+        VideoFormat(uint32_t pixelFormat, uint32_t minNumBuffers, media::Size codedSize,
+                    media::Rect visibleRect);
+    };
+
     // Get configured parameters from component interface. This should be called once framework
     // wants to start the component.
-    void getParameters();
+    void fetchParametersFromIntf();
+    // Used as the release callback for C2VDAGraphicBuffer to get back the output buffer.
+    void returnOutputBuffer(int32_t pictureBufferId);
 
     // These tasks should be run on the component thread |mThread|.
     void onCreate();
     void onDestroy();
     void onStart(media::VideoCodecProfile profile, base::WaitableEvent* done);
+    void onQueueWork(std::unique_ptr<C2Work> work);
+    void onDequeueWork();
+    void onInputBufferDone(int32_t bitstreamId);
+    void onOutputBufferDone(int32_t pictureBufferId, int32_t bitstreamId);
+    void onDrain();
+    void onDrainDone();
+    void onFlush();
     void onStop(base::WaitableEvent* done);
+    void onResetDone();
+    void onFlushDone();
     void onStopDone();
+    void onOutputFormatChanged(std::unique_ptr<VideoFormat> format);
+    void onVisibleRectChanged(const media::Rect& cropRect);
+    void onOutputBufferReturned(int32_t pictureBufferId);
+
+    // Send input buffer to accelerator with specified bitstream id.
+    void sendInputBufferToAccelerator(const C2ConstLinearBlock& input, int32_t bitstreamId);
+    // Send output buffer to accelerator.
+    void sendOutputBufferToAccelerator(GraphicBlockInfo* info);
+    // Set crop rectangle infomation to output format.
+    void setOutputFormatCrop(const media::Rect& cropRect);
+    // Helper function to get the specified GraphicBlockInfo object by its id.
+    GraphicBlockInfo* getGraphicBlockById(int32_t blockId);
+    // Helper function to get the specified work in mPendingWorks by bitstream id.
+    C2Work* getPendingWorkByBitstreamId(int32_t bitstreamId);
+    // Helper function to get the work which is last to finish in mPendingWorks.
+    C2Work* getPendingWorkLastToFinish();
+    // Try to apply the output format change.
+    void tryChangeOutputFormat();
+    // Allocate output buffers (graphic blocks) from block allocator.
+    c2_status_t allocateBuffersFromBlockAllocator(const media::Size& size, int pixelFormat);
+    // Append allocated buffer (graphic block) to mGraphicBlocks.
+    void appendOutputBuffer(std::shared_ptr<C2GraphicBlock> block);
+
+    // Check for finished works in mPendingWorks. If any, make onWorkDone call to listener.
+    void reportFinishedWorkIfAny();
+    // Abandon all works in mPendingWorks.
+    void reportAbandonedWorks();
+    // Make onError call to listener for reporting errors.
+    void reportError(c2_status_t error);
+    // Helper function to determine if the work is finished.
+    bool isWorkDone(const C2Work* work) const;
 
     // The pointer of component interface.
     const std::shared_ptr<C2VDAComponentIntf> mIntf;
@@ -194,6 +275,29 @@
     base::WaitableEvent* mStopDoneEvent;
     // The state machine on component thread.
     ComponentState mComponentState;
+    // The vector of storing allocated output graphic block information.
+    std::vector<GraphicBlockInfo> mGraphicBlocks;
+    // The work queue. Works are queued from component API queue_nb and dequeued by the decode
+    // process of component.
+    std::queue<std::unique_ptr<C2Work>> mQueue;
+    // Store all pending works. The dequeued works are placed here until they are finished and then
+    // sent out by onWorkDone call to listener.
+    std::deque<std::unique_ptr<C2Work>> mPendingWorks;
+    // Store the visible rect provided from VDA. If this is changed, component should issue a
+    // visible size change event.
+    media::Rect mRequestedVisibleRect;
+    // The current output format.
+    VideoFormat mOutputFormat;
+    // The pending output format. We need to wait until all buffers are returned back to apply the
+    // format change.
+    std::unique_ptr<VideoFormat> mPendingOutputFormat;
+    // The current color format.
+    uint32_t mColorFormat;
+    // Record the timestamp of the last output buffer. This is used to determine if the work is
+    // finished.
+    int64_t mLastOutputTimestamp;
+    // The pointer of output block pool.
+    std::shared_ptr<C2BlockPool> mOutputBlockPool;
 
     // The following members should be utilized on parent thread.
 
@@ -202,6 +306,9 @@
     // The state machine on parent thread.
     State mState;
 
+    // The WeakPtrFactory for getting weak pointer of this.
+    base::WeakPtrFactory<C2VDAComponent> mWeakThisFactory;
+
     DISALLOW_COPY_AND_ASSIGN(C2VDAComponent);
 };
 
@@ -213,15 +320,15 @@
     C2String getName() const override;
 
     c2_status_t createComponent(C2String name,
-                             std::shared_ptr<C2Component>* const component) override;
+                                std::shared_ptr<C2Component>* const component) override;
 
     c2_status_t createInterface(C2String name,
-                             std::shared_ptr<C2ComponentInterface>* const interface) override;
+                                std::shared_ptr<C2ComponentInterface>* const interface) override;
 
     std::vector<std::shared_ptr<const C2Component::Traits>> listComponents() override;
 
     c2_status_t copyBuffer(std::shared_ptr<C2GraphicBuffer> src,
-                        std::shared_ptr<C2GraphicBuffer> dst) override;
+                           std::shared_ptr<C2GraphicBuffer> dst) override;
 
     std::shared_ptr<C2ParamReflector> getParamReflector() const override;
 
@@ -232,14 +339,14 @@
             std::vector<C2FieldSupportedValuesQuery>& fields) const override;
 
     c2_status_t query_sm(const std::vector<C2Param* const>& stackParams,
-                      const std::vector<C2Param::Index>& heapParamIndices,
-                      std::vector<std::unique_ptr<C2Param>>* const heapParams) const override;
+                         const std::vector<C2Param::Index>& heapParamIndices,
+                         std::vector<std::unique_ptr<C2Param>>* const heapParams) const override;
 
     c2_status_t config_sm(const std::vector<C2Param* const>& params,
-                       std::vector<std::unique_ptr<C2SettingResult>>* const failures) override;
+                          std::vector<std::unique_ptr<C2SettingResult>>* const failures) override;
 
     c2_status_t commit_sm(const std::vector<C2Param* const>& params,
-                       std::vector<std::unique_ptr<C2SettingResult>>* const failures) override;
+                          std::vector<std::unique_ptr<C2SettingResult>>* const failures) override;
 
 
 private:
diff --git a/cmds/Android.mk b/cmds/Android.mk
new file mode 100644
index 0000000..c5ae806
--- /dev/null
+++ b/cmds/Android.mk
@@ -0,0 +1,42 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+        codec2.cpp \
+
+LOCAL_C_INCLUDES += \
+        $(TOP)/external/libchrome \
+        $(TOP)/external/gtest/include \
+        $(TOP)/external/v4l2_codec2 \
+        $(TOP)/external/v4l2_codec2/vda \
+        $(TOP)/external/v4l2_codec2/vndk/include \
+        $(TOP)/frameworks/av/media/libstagefright/codec2/include \
+        $(TOP)/frameworks/av/media/libstagefright/codec2/vndk/include \
+        $(TOP)/frameworks/av/media/libstagefright/include \
+        $(TOP)/frameworks/native/include \
+
+LOCAL_MODULE := v4l2_codec2_testapp
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_SHARED_LIBRARIES := libbinder \
+                          libchrome \
+                          libcutils \
+                          libgui \
+                          liblog \
+                          libmedia \
+                          libstagefright \
+                          libstagefright_codec2 \
+                          libstagefright_foundation \
+                          libui \
+                          libutils \
+                          libv4l2_codec2 \
+                          libv4l2_codec2_vda \
+                          libv4l2_codec2_vndk \
+
+LOCAL_STATIC_LIBRARIES := libstagefright_codec2_vndk \
+
+# -Wno-unused-parameter is needed for libchrome/base codes
+LOCAL_CFLAGS += -Werror -Wall -Wno-unused-parameter
+LOCAL_CLANG := true
+
+include $(BUILD_EXECUTABLE)
diff --git a/cmds/codec2.cpp b/cmds/codec2.cpp
new file mode 100644
index 0000000..c5fe44e
--- /dev/null
+++ b/cmds/codec2.cpp
@@ -0,0 +1,484 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "codec2"
+
+#include <inttypes.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <thread>
+
+#include "C2VDAComponent.h"
+
+#include <C2AllocatorCrosGralloc.h>
+#include <C2AllocatorMemDealer.h>
+#include <C2Buffer.h>
+#include <C2BufferPriv.h>
+#include <C2Component.h>
+#include <C2VDASupport.h>
+#include <C2Work.h>
+
+#include <binder/IServiceManager.h>
+#include <binder/ProcessState.h>
+#include <media/ICrypto.h>
+#include <media/IMediaHTTPService.h>
+#include <media/stagefright/foundation/ABuffer.h>
+#include <media/stagefright/foundation/ALooper.h>
+#include <media/stagefright/foundation/AMessage.h>
+#include <media/stagefright/foundation/AUtils.h>
+#include <media/stagefright/DataSource.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaErrors.h>
+#include <media/stagefright/MediaExtractor.h>
+#include <media/stagefright/MediaSource.h>
+#include <media/stagefright/MetaData.h>
+#include <media/stagefright/Utils.h>
+
+#include <gui/GLConsumer.h>
+#include <gui/IProducerListener.h>
+#include <gui/Surface.h>
+#include <gui/SurfaceComposerClient.h>
+
+using namespace android;
+using namespace std::chrono_literals;
+
+namespace {
+
+const int kWidth = 416;
+const int kHeight = 240;  // BigBuckBunny.mp4
+//const int kWidth = 560;
+//const int kHeight = 320;  // small.mp4
+
+class C2VDALinearBuffer : public C2Buffer {
+public:
+    explicit C2VDALinearBuffer(const std::shared_ptr<C2LinearBlock>& block)
+        : C2Buffer({ block->share(block->offset(), block->size(), ::android::C2Fence()) }) {}
+};
+
+class Listener;
+
+class SimplePlayer {
+public:
+    SimplePlayer();
+    ~SimplePlayer();
+
+    void onWorkDone(std::weak_ptr<C2Component> component,
+                    std::vector<std::unique_ptr<C2Work>> workItems);
+    void onTripped(std::weak_ptr<C2Component> component,
+                   std::vector<std::shared_ptr<C2SettingResult>> settingResult);
+    void onError(std::weak_ptr<C2Component> component, uint32_t errorCode);
+
+    status_t play(const sp<IMediaSource> &source);
+
+private:
+    typedef std::unique_lock<std::mutex> ULock;
+
+    enum {
+        kInputBufferCount = 8,
+        kDefaultInputBufferSize = 1024 * 1024,
+    };
+
+    std::shared_ptr<Listener> mListener;
+
+    sp<IProducerListener> mProducerListener;
+
+    // Allocators
+    std::shared_ptr<C2Allocator> mLinearAlloc;
+    std::shared_ptr<C2BlockPool> mLinearBlockPool;
+
+    std::mutex mQueueLock;
+    std::condition_variable mQueueCondition;
+    std::list<std::unique_ptr<C2Work>> mWorkQueue;
+
+    std::mutex mProcessedLock;
+    std::condition_variable mProcessedCondition;
+    std::list<std::unique_ptr<C2Work>> mProcessedWork;
+
+    sp<Surface> mSurface;
+    sp<SurfaceComposerClient> mComposerClient;
+    sp<SurfaceControl> mControl;
+};
+
+class Listener : public C2Component::Listener {
+public:
+    explicit Listener(SimplePlayer *thiz) : mThis(thiz) {}
+    virtual ~Listener() = default;
+
+    virtual void onWorkDone_nb(std::weak_ptr<C2Component> component,
+                               std::vector<std::unique_ptr<C2Work>> workItems) override {
+        mThis->onWorkDone(component, std::move(workItems));
+    }
+
+    virtual void onTripped_nb(
+            std::weak_ptr<C2Component> component,
+            std::vector<std::shared_ptr<C2SettingResult>> settingResult) override {
+        mThis->onTripped(component, settingResult);
+    }
+
+    virtual void onError_nb(std::weak_ptr<C2Component> component,
+                            uint32_t errorCode) override {
+        mThis->onError(component, errorCode);
+    }
+
+private:
+    SimplePlayer * const mThis;
+};
+
+SimplePlayer::SimplePlayer()
+    : mListener(new Listener(this)),
+      mProducerListener(new DummyProducerListener),
+      mComposerClient(new SurfaceComposerClient) {
+    CHECK_EQ(mComposerClient->initCheck(), OK);
+
+    std::shared_ptr<C2AllocatorStore> store = getCodec2VDAAllocatorStore();
+    CHECK_EQ(store->fetchAllocator(C2AllocatorStore::DEFAULT_LINEAR, &mLinearAlloc), C2_OK);
+
+    mLinearBlockPool = std::make_shared<C2BasicLinearBlockPool>(mLinearAlloc);
+
+    mControl = mComposerClient->createSurface(String8("A Surface"),
+                                              kWidth, kHeight, HAL_PIXEL_FORMAT_YV12);
+
+    CHECK(mControl != nullptr);
+    CHECK(mControl->isValid());
+
+    SurfaceComposerClient::openGlobalTransaction();
+    CHECK_EQ(mControl->setLayer(INT_MAX), OK);
+    CHECK_EQ(mControl->show(), OK);
+    SurfaceComposerClient::closeGlobalTransaction();
+
+    mSurface = mControl->getSurface();
+    CHECK(mSurface != nullptr);
+    mSurface->connect(NATIVE_WINDOW_API_CPU, mProducerListener);
+}
+
+SimplePlayer::~SimplePlayer() {
+    mComposerClient->dispose();
+}
+
+void SimplePlayer::onWorkDone(
+        std::weak_ptr<C2Component> component, std::vector<std::unique_ptr<C2Work>> workItems) {
+    (void) component;
+    ULock l(mProcessedLock);
+    for (auto & item : workItems) {
+        mProcessedWork.emplace_back(std::move(item));
+    }
+    mProcessedCondition.notify_all();
+}
+
+void SimplePlayer::onTripped(
+        std::weak_ptr<C2Component> component,
+        std::vector<std::shared_ptr<C2SettingResult>> settingResult) {
+    (void) component;
+    (void) settingResult;
+    // TODO
+}
+
+void SimplePlayer::onError(std::weak_ptr<C2Component> component, uint32_t errorCode) {
+    (void) component;
+    (void) errorCode;
+    // TODO
+}
+
+// TODO(johnylin): remove this when we move the development env to P
+// from master: system/core/libcutils/native_handle.c
+native_handle_t* native_handle_clone(const native_handle_t* handle) {
+    native_handle_t* clone = native_handle_create(handle->numFds, handle->numInts);
+    if (clone == nullptr) return nullptr;
+    for (int i = 0; i < handle->numFds; i++) {
+        clone->data[i] = dup(handle->data[i]);
+        ALOGV("clone handle fd: %d", clone->data[i]);
+        if (clone->data[i] == -1) {
+            clone->numFds = i;
+            native_handle_close(clone);
+            native_handle_delete(clone);
+            return nullptr;
+        }
+    }
+    memcpy(&clone->data[handle->numFds], &handle->data[handle->numFds],
+           sizeof(int) * handle->numInts);
+    return clone;
+}
+
+status_t SimplePlayer::play(const sp<IMediaSource> &source) {
+    sp<AMessage> format;
+    (void) convertMetaDataToMessage(source->getFormat(), &format);
+
+    std::deque<sp<ABuffer>> csds(2);
+    format->findBuffer("csd-0", &csds[0]);
+    format->findBuffer("csd-1", &csds[1]);
+
+    status_t err = source->start();
+
+    if (err != OK) {
+        ALOGE("source returned error %d (0x%08x)", err, err);
+        fprintf(stderr, "source returned error %d (0x%08x)\n", err, err);
+        return err;
+    }
+
+    std::shared_ptr<C2Component> component(std::make_shared<C2VDAComponent>("avc", 0));
+    component->setListener_sm(mListener);
+    std::unique_ptr<C2PortBlockPoolsTuning::output> pools =
+        C2PortBlockPoolsTuning::output::alloc_unique(
+                { static_cast<uint64_t>(C2BlockPool::BASIC_GRAPHIC) });
+    std::vector<std::unique_ptr<C2SettingResult>> result;
+    (void)component->intf()->config_nb({pools.get()}, &result);
+    component->start();
+
+    mProcessedWork.clear();
+    for (int i = 0; i < kInputBufferCount; ++i) {
+        mWorkQueue.emplace_back(new C2Work);
+    }
+
+    std::atomic_bool running(true);
+    std::thread surfaceThread([this, &running]() {
+        const sp<IGraphicBufferProducer> &igbp = mSurface->getIGraphicBufferProducer();
+        std::vector<std::shared_ptr<C2Buffer>> pendingDisplayBuffers;
+        pendingDisplayBuffers.resize(BufferQueue::NUM_BUFFER_SLOTS);
+        while (running) {
+            std::unique_ptr<C2Work> work;
+            {
+                ULock l(mProcessedLock);
+                if (mProcessedWork.empty()) {
+                    mProcessedCondition.wait_for(l, 100ms);
+                    if (mProcessedWork.empty()) {
+                        continue;
+                    }
+                }
+                work = std::move(mProcessedWork.front());
+                mProcessedWork.pop_front();
+            }
+
+            if (work->worklets_processed > 0) {
+                int slot;
+                sp<Fence> fence;
+                std::shared_ptr<C2Buffer> output = work->worklets.front()->output.buffers[0];
+                C2ConstGraphicBlock graphic_block = output->data().graphicBlocks().front();
+
+                // Create GraphicBuffer from cloning native_handle
+                native_handle_t* cloneHandle = native_handle_clone(graphic_block.handle());
+                sp<GraphicBuffer> buffer = new GraphicBuffer(
+                        graphic_block.width(),
+                        graphic_block.height(),
+                        HAL_PIXEL_FORMAT_YCbCr_420_888,
+                        GRALLOC_USAGE_SW_READ_OFTEN,
+                        graphic_block.width(),
+                        cloneHandle,
+                        false);
+
+                CHECK_EQ(igbp->attachBuffer(&slot, buffer), OK);
+                ALOGV("attachBuffer slot=%d ts=%lld", slot,
+                      work->worklets.front()->output.ordinal.timestamp * 1000ll);
+
+                IGraphicBufferProducer::QueueBufferInput qbi(
+                        work->worklets.front()->output.ordinal.timestamp * 1000ll,
+                        false,
+                        HAL_DATASPACE_UNKNOWN,
+                        Rect(graphic_block.width(), graphic_block.height()),
+                        NATIVE_WINDOW_SCALING_MODE_SCALE_TO_WINDOW,
+                        0,
+                        Fence::NO_FENCE,
+                        0);
+                IGraphicBufferProducer::QueueBufferOutput qbo;
+                CHECK_EQ(igbp->queueBuffer(slot, qbi, &qbo), OK);
+
+                // If the slot is reused then we can make sure the previous graphic buffer is
+                // displayed (consumed), so we could returned the graphic buffer.
+                pendingDisplayBuffers[slot].swap(output);
+
+                // Remember to close the cloned handle.
+                native_handle_close(cloneHandle);
+                native_handle_delete(cloneHandle);
+            }
+
+            // input buffers should be cleared in component side.
+            CHECK(work->input.buffers.empty());
+            work->worklets.clear();
+            work->worklets_processed = 0;
+
+            if (work->input.flags & C2BufferPack::FLAG_END_OF_STREAM) {
+                running.store(false);  // stop the thread
+            }
+
+            ULock l(mQueueLock);
+            mWorkQueue.emplace_back(std::move(work));
+            mQueueCondition.notify_all();
+        }
+    });
+
+    long numFrames = 0;
+
+    for (;;) {
+        size_t size = 0u;
+        void *data = nullptr;
+        int64_t timestamp = 0u;
+        MediaBuffer* buffer = nullptr;
+        sp<ABuffer> csd;
+        if (!csds.empty()) {
+            csd = std::move(csds.front());
+            csds.pop_front();
+            size = csd->size();
+            data = csd->data();
+        } else {
+            status_t err = source->read(&buffer);
+            if (err != OK) {
+                CHECK(buffer == nullptr);
+
+                if (err == INFO_FORMAT_CHANGED) {
+                    continue;
+                }
+
+                break;
+            }
+            sp<MetaData> meta = buffer->meta_data();
+            CHECK(meta->findInt64(kKeyTime, &timestamp));
+
+            size = buffer->size();
+            data = buffer->data();
+        }
+
+        // Prepare C2Work
+
+        std::unique_ptr<C2Work> work;
+        while (!work) {
+            ULock l(mQueueLock);
+            if (!mWorkQueue.empty()) {
+                work = std::move(mWorkQueue.front());
+                mWorkQueue.pop_front();
+            } else {
+                mQueueCondition.wait_for(l, 100ms);
+            }
+        }
+        work->input.flags = static_cast<C2BufferPack::flags_t>(0);
+        work->input.ordinal.timestamp = timestamp;
+        work->input.ordinal.frame_index = numFrames;
+
+        // Allocate input buffer.
+        std::shared_ptr<C2LinearBlock> block;
+        mLinearBlockPool->fetchLinearBlock(
+                size, { C2MemoryUsage::kSoftwareRead, C2MemoryUsage::kSoftwareWrite }, &block);
+        C2WriteView view = block->map().get();
+        if (view.error() != C2_OK) {
+            fprintf(stderr, "C2LinearBlock::map() failed : %d\n", view.error());
+            break;
+        }
+        memcpy(view.base(), data, size);
+
+        work->input.buffers.clear();
+        work->input.buffers.emplace_back(new C2VDALinearBuffer(std::move(block)));
+        work->worklets.clear();
+        work->worklets.emplace_back(new C2Worklet);
+
+        std::list<std::unique_ptr<C2Work>> items;
+        items.push_back(std::move(work));
+
+        // DO THE DECODING
+        component->queue_nb(&items);
+
+        if (buffer) {
+            buffer->release();
+        }
+        ++numFrames;
+    }
+    component->drain_nb(C2Component::DRAIN_COMPONENT);
+
+    surfaceThread.join();
+
+    source->stop();
+    component->stop();
+    printf("finished...\n");
+    return OK;
+}
+
+}  // namespace
+
+static bool getMediaSourceFromFile(const char* filename, sp<IMediaSource>* source) {
+    source->clear();
+
+    sp<DataSource> dataSource =
+            DataSource::CreateFromURI(nullptr /* httpService */, filename);
+
+    if (dataSource == nullptr) {
+        fprintf(stderr, "Unable to create data source.\n");
+        return false;
+    }
+
+    sp<IMediaExtractor> extractor = MediaExtractor::Create(dataSource);
+    if (extractor == nullptr) {
+        fprintf(stderr, "could not create extractor.\n");
+        return false;
+    }
+
+    for (size_t i = 0, numTracks = extractor->countTracks(); i < numTracks; ++i) {
+        sp<MetaData> meta =
+                extractor->getTrackMetaData(i, MediaExtractor::kIncludeExtensiveMetaData);
+        if (meta == nullptr) {
+            continue;
+        }
+        const char *mime;
+        meta->findCString(kKeyMIMEType, &mime);
+        // TODO: only h264 is working now, adapt other codec (vp8/9) stream in the future.
+        if (!strncasecmp(mime, "video/avc", 9)) {
+            *source = extractor->getTrack(i);
+            if (*source == nullptr) {
+                fprintf(stderr, "It's nullptr track for track %zu.\n", i);
+                return false;
+            }
+            return true;
+        }
+    }
+    fprintf(stderr, "No AVC track found.\n");
+    return false;
+}
+
+static void usage(const char *me) {
+    fprintf(stderr, "usage: %s [options] [input_filename]...\n", me);
+    fprintf(stderr, "       -h(elp)\n");
+}
+
+int main(int argc, char **argv) {
+    android::ProcessState::self()->startThreadPool();
+
+    int res;
+    while ((res = getopt(argc, argv, "h")) >= 0) {
+        switch (res) {
+            case 'h':
+            default:
+            {
+                usage(argv[0]);
+                exit(1);
+                break;
+            }
+        }
+    }
+
+    argc -= optind;
+    argv += optind;
+
+    if (argc < 1) {
+        fprintf(stderr, "No input file specified\n");
+        return 1;
+    }
+
+    SimplePlayer player;
+
+    for (int k = 0; k < argc; ++k) {
+        sp<IMediaSource> mediaSource;
+        if (!getMediaSourceFromFile(argv[k], &mediaSource)) {
+            fprintf(stderr, "Unable to get media source from file: %s\n", argv[k]);
+            return -1;
+        }
+        if (player.play(mediaSource) != OK) {
+            fprintf(stderr, "Player failed to play media source: %s\n", argv[k]);
+            return -1;
+        }
+    }
+
+    return 0;
+}