Snap for 6824415 from e61a51544487d2e04624c695c26c9714605e94cd to rvc-d2-release

Change-Id: I8b87525808f4597091cf5ed460701e8c2e0601db
diff --git a/accel/Android.bp b/accel/Android.bp
index 214376a..1bf4805 100644
--- a/accel/Android.bp
+++ b/accel/Android.bp
@@ -38,7 +38,6 @@
         "vp9_picture.cc",
         "vp9_raw_bits_reader.cc",
         "vp9_uncompressed_header_parser.cc",
-        "unaligned_shared_memory.cc",
     ],
 
     shared_libs: ["libchrome"],
diff --git a/accel/unaligned_shared_memory.cc b/accel/unaligned_shared_memory.cc
deleted file mode 100644
index 77d90c9..0000000
--- a/accel/unaligned_shared_memory.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2018 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-// Note: ported from Chromium commit head: 690c8b268457
-// Note: only necessary functions are ported.
-
-#include "unaligned_shared_memory.h"
-
-#include <limits>
-
-#include "base/logging.h"
-#include "base/sys_info.h"
-
-namespace media {
-
-namespace {
-
-bool CalculateMisalignmentAndOffset(size_t size,
-                                    off_t offset,
-                                    size_t* misalignment,
-                                    off_t* adjusted_offset) {
-  /* |   |   |   |   |   |  shm pages
-   *       |                offset (may exceed max size_t)
-   *       |-----------|    size
-   *     |-|                misalignment
-   *     |                  adjusted offset
-   *     |-------------|    requested mapping
-   */
-
-  // Note: result of % computation may be off_t or size_t, depending on the
-  // relative ranks of those types. In any case we assume that
-  // VMAllocationGranularity() fits in both types, so the final result does too.
-  DCHECK_GE(offset, 0);
-  *misalignment = offset % base::SysInfo::VMAllocationGranularity();
-
-  // Above this |max_size|, |size| + |*misalignment| overflows.
-  size_t max_size = std::numeric_limits<size_t>::max() - *misalignment;
-  if (size > max_size) {
-    DLOG(ERROR) << "Invalid size";
-    return false;
-  }
-
-  *adjusted_offset = offset - static_cast<off_t>(*misalignment);
-
-  return true;
-}
-
-}  // namespace
-UnalignedSharedMemory::UnalignedSharedMemory(
-    const base::SharedMemoryHandle& handle,
-    size_t size,
-    bool read_only)
-    : shm_(handle, read_only), size_(size) {}
-
-UnalignedSharedMemory::~UnalignedSharedMemory() = default;
-
-bool UnalignedSharedMemory::MapAt(off_t offset, size_t size) {
-  if (offset < 0) {
-    DLOG(ERROR) << "Invalid offset";
-    return false;
-  }
-
-  size_t misalignment;
-  off_t adjusted_offset;
-
-  if (!CalculateMisalignmentAndOffset(size, offset, &misalignment,
-                                      &adjusted_offset)) {
-    return false;
-  }
-
-  if (!shm_.MapAt(adjusted_offset, size + misalignment)) {
-    DLOG(ERROR) << "Failed to map shared memory";
-    return false;
-  }
-  mapping_ptr_ = static_cast<uint8_t*>(shm_.memory());
-
-  DCHECK(mapping_ptr_);
-  // There should be no way for the IsValid() checks above to succeed and yet
-  // |mapping_ptr_| remain null. However, since an invalid but non-null pointer
-  // could be disastrous an extra-careful check is done.
-  if (mapping_ptr_)
-    mapping_ptr_ += misalignment;
-  return true;
-}
-
-}  // namespace media
diff --git a/accel/unaligned_shared_memory.h b/accel/unaligned_shared_memory.h
deleted file mode 100644
index 88b594c..0000000
--- a/accel/unaligned_shared_memory.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2018 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-// Note: ported from Chromium commit head: 690c8b268457
-// Note: only necessary functions are ported.
-// Note: The version in Chromium has moved away from using base::SharedMemory.
-
-#ifndef UNALIGNED_SHARED_MEMORY_H_
-#define UNALIGNED_SHARED_MEMORY_H_
-
-#include <stdint.h>
-
-#include "base/macros.h"
-#include "base/memory/shared_memory.h"
-
-namespace media {
-
-// Wrapper over base::SharedMemory that can be mapped at unaligned offsets.
-// DEPRECATED! See https://crbug.com/795291.
-class UnalignedSharedMemory {
- public:
-  // Creates an |UnalignedSharedMemory| instance from a
-  // |SharedMemoryHandle|. |size| sets the maximum size that may be mapped. This
-  // instance will own the handle.
-  UnalignedSharedMemory(const base::SharedMemoryHandle& handle,
-                        size_t size,
-                        bool read_only);
-
-  ~UnalignedSharedMemory();
-
-  // Map the shared memory region. Note that the passed |size| parameter should
-  // be less than or equal to |size()|.
-  bool MapAt(off_t offset, size_t size);
-  size_t size() const { return size_; }
-  void* memory() const { return mapping_ptr_; }
-
- private:
-  // Either |shm_| or the set |region_| and one of the mappings are active,
-  // depending on which constructor was used and the value of read_only_. These
-  // variables are held to keep the shared memory mapping valid for the lifetime
-  // of this instance.
-  base::SharedMemory shm_;
-
-  // The size of the region associated with |shm_|.
-  size_t size_;
-
-  // Pointer to the unaligned data in the shared memory mapping.
-  uint8_t* mapping_ptr_ = nullptr;
-
-  DISALLOW_COPY_AND_ASSIGN(UnalignedSharedMemory);
-};
-
-}  // namespace media
-
-#endif  // UNALIGNED_SHARED_MEMORY_H_
diff --git a/accel/v4l2_device.cc b/accel/v4l2_device.cc
index 8ab9898..5c258ab 100644
--- a/accel/v4l2_device.cc
+++ b/accel/v4l2_device.cc
@@ -468,7 +468,19 @@
   return std::move(self).DoQueue(request_ref);
 }
 
-bool V4L2WritableBufferRef::QueueDMABuf(const std::vector<base::ScopedFD>& fds,
+bool V4L2WritableBufferRef::QueueDMABuf(const std::vector<base::ScopedFD>& scoped_fds,
+                                        V4L2RequestRef* request_ref) && {
+  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+
+  std::vector<int> fds;
+  fds.reserve(scoped_fds.size());
+  for (const base::ScopedFD& scoped_fd : scoped_fds)
+    fds.push_back(scoped_fd.get());
+
+  return std::move(*this).QueueDMABuf(fds, request_ref);
+}
+
+bool V4L2WritableBufferRef::QueueDMABuf(const std::vector<int>& fds,
                                         V4L2RequestRef* request_ref) && {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK(buffer_data_);
@@ -486,7 +498,7 @@
 
   size_t num_planes = self.PlanesCount();
   for (size_t i = 0; i < num_planes; i++)
-    self.buffer_data_->v4l2_buffer_.m.planes[i].m.fd = fds[i].get();
+    self.buffer_data_->v4l2_buffer_.m.planes[i].m.fd = fds[i];
 
   return std::move(self).DoQueue(request_ref);
 }
diff --git a/accel/v4l2_device.h b/accel/v4l2_device.h
index 09b98f0..f00a604 100644
--- a/accel/v4l2_device.h
+++ b/accel/v4l2_device.h
@@ -122,7 +122,19 @@
   // so this reference becomes invalid.
   // In case of error, false is returned and the buffer is returned to the free
   // list.
-  bool QueueDMABuf(const std::vector<base::ScopedFD>& fds,
+  bool QueueDMABuf(const std::vector<base::ScopedFD>& scoped_fds,
+                   V4L2RequestRef* request_ref = nullptr) &&;
+  // Queue a DMABUF buffer, assigning |fds| as file descriptors for each plane.
+  // It is allowed the number of |fds| might be greater than the number of
+  // planes of this buffer. It happens when the v4l2 pixel format is single
+  // planar. The fd of the first plane is only used in that case.
+  // When requests are supported, a |request_ref| can be passed along this
+  // the buffer to be submitted.
+  // If successful, true is returned and the reference to the buffer is dropped
+  // so this reference becomes invalid.
+  // In case of error, false is returned and the buffer is returned to the free
+  // list.
+  bool QueueDMABuf(const std::vector<int>& fds,
                    V4L2RequestRef* request_ref = nullptr) &&;
 
   // Returns the number of planes in this buffer.
diff --git a/common/Android.bp b/common/Android.bp
index e9d407b..45334d0 100644
--- a/common/Android.bp
+++ b/common/Android.bp
@@ -10,6 +10,7 @@
         "EncodeHelpers.cpp",
         "FormatConverter.cpp",
         "V4L2ComponentCommon.cpp",
+        "VideoTypes.cpp",
     ],
 
     export_include_dirs: [
diff --git a/common/FormatConverter.cpp b/common/FormatConverter.cpp
index 657676e..9ab9161 100644
--- a/common/FormatConverter.cpp
+++ b/common/FormatConverter.cpp
@@ -20,7 +20,7 @@
 #include <ui/GraphicBuffer.h>
 #include <utils/Log.h>
 
-#include <v4l2_codec2/common/Common.h>  // for HalPixelFormat
+#include <v4l2_codec2/common/VideoTypes.h>  // for HalPixelFormat
 
 using android::hardware::graphics::common::V1_0::BufferUsage;
 
@@ -131,7 +131,7 @@
         // conversion to perform I420.
         halFormat = HalPixelFormat::YV12;
     } else {
-        halFormat = HalPixelFormat::YCbCr_420_888;  // will allocate NV12 by minigbm.
+        halFormat = HalPixelFormat::YCBCR_420_888;  // will allocate NV12 by minigbm.
     }
 
     uint32_t bufferCount = std::max(inputCount, kMinInputBufferCount);
diff --git a/components/VideoTypes.cpp b/common/VideoTypes.cpp
similarity index 88%
rename from components/VideoTypes.cpp
rename to common/VideoTypes.cpp
index 9b71db8..1ecceca 100644
--- a/components/VideoTypes.cpp
+++ b/common/VideoTypes.cpp
@@ -5,7 +5,7 @@
 //#define LOG_NDEBUG 0
 #define LOG_TAG "VideoTypes"
 
-#include <v4l2_codec2/components/VideoTypes.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 
 #include <log/log.h>
 
@@ -24,6 +24,8 @@
 
 const char* HalPixelFormatToString(HalPixelFormat format) {
     switch (format) {
+    case HalPixelFormat::UNKNOWN:
+        return "Unknown";
     case HalPixelFormat::YCBCR_420_888:
         return "YCBCR_420_888";
     case HalPixelFormat::YV12:
diff --git a/common/include/v4l2_codec2/common/Common.h b/common/include/v4l2_codec2/common/Common.h
index 1b816ef..650b7a7 100644
--- a/common/include/v4l2_codec2/common/Common.h
+++ b/common/include/v4l2_codec2/common/Common.h
@@ -15,20 +15,6 @@
     uint32_t mStride;
 };
 
-enum class HalPixelFormat : uint32_t {
-    UNKNOWN = 0x0,
-    // The pixel formats defined in Android but are used among C2VDAComponent.
-    YCbCr_420_888 = 0x23,
-    YV12 = 0x32315659,
-    NV12 = 0x3231564e,
-};
-
-enum class InputCodec {
-    H264,
-    VP8,
-    VP9,
-};
-
-} // namespace android
+}  // namespace android
 
 #endif  // ANDROID_V4L2_CODEC2_COMMON_COMMON_H
diff --git a/components/include/v4l2_codec2/components/VideoTypes.h b/common/include/v4l2_codec2/common/VideoTypes.h
similarity index 80%
rename from components/include/v4l2_codec2/components/VideoTypes.h
rename to common/include/v4l2_codec2/common/VideoTypes.h
index bcc9bc0..a5130d2 100644
--- a/components/include/v4l2_codec2/components/VideoTypes.h
+++ b/common/include/v4l2_codec2/common/VideoTypes.h
@@ -22,10 +22,11 @@
 
 // Enumeration of supported pixel format. The value should be the same as
 // ::android::hardware::graphics::common::V1_0::PixelFormat.
-using ::android::hardware::graphics::common::V1_0::PixelFormat;
+using HPixelFormat = ::android::hardware::graphics::common::V1_0::PixelFormat;
 enum class HalPixelFormat : int32_t {
-    YCBCR_420_888 = static_cast<int32_t>(PixelFormat::YCBCR_420_888),
-    YV12 = static_cast<int32_t>(PixelFormat::YV12),
+    UNKNOWN = 0x0,
+    YCBCR_420_888 = static_cast<int32_t>(HPixelFormat::YCBCR_420_888),
+    YV12 = static_cast<int32_t>(HPixelFormat::YV12),
     // NV12 is not defined at PixelFormat, follow the convention to use fourcc value.
     NV12 = 0x3231564e,
 };
diff --git a/components/Android.bp b/components/Android.bp
index 7f2b9dc..8273412 100644
--- a/components/Android.bp
+++ b/components/Android.bp
@@ -16,7 +16,6 @@
         "V4L2EncodeComponent.cpp",
         "V4L2EncodeInterface.cpp",
         "VideoDecoder.cpp",
-        "VideoTypes.cpp",
     ],
     export_include_dirs: [
         "include",
diff --git a/components/V4L2DecodeComponent.cpp b/components/V4L2DecodeComponent.cpp
index 0548267..1ea9a7b 100644
--- a/components/V4L2DecodeComponent.cpp
+++ b/components/V4L2DecodeComponent.cpp
@@ -24,10 +24,10 @@
 #include <media/stagefright/foundation/ColorUtils.h>
 
 #include <h264_parser.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/components/BitstreamBuffer.h>
 #include <v4l2_codec2/components/V4L2Decoder.h>
 #include <v4l2_codec2/components/VideoFramePool.h>
-#include <v4l2_codec2/components/VideoTypes.h>
 #include <v4l2_codec2/plugin_store/C2VdaBqBlockPool.h>
 
 namespace android {
@@ -40,19 +40,6 @@
     return static_cast<int32_t>(frameIndex.peeku() & 0x3FFFFFFF);
 }
 
-std::unique_ptr<BitstreamBuffer> C2BlockToBitstreamBuffer(const C2ConstLinearBlock& block,
-                                                          const int32_t bitstreamId) {
-    const int fd = block.handle()->data[0];
-    auto dupFd = ::base::ScopedFD(dup(fd));
-    if (!dupFd.is_valid()) {
-        ALOGE("Failed to dup(%d) input buffer (bitstreamId=%d), errno=%d", fd, bitstreamId, errno);
-        return nullptr;
-    }
-
-    return std::make_unique<BitstreamBuffer>(bitstreamId, std::move(dupFd), block.offset(),
-                                             block.size());
-}
-
 bool parseCodedColorAspects(const C2ConstLinearBlock& input,
                             C2StreamColorAspectsInfo::input* codedAspects) {
     C2ReadView view = input.map().get();
@@ -466,7 +453,9 @@
                 }
             }
 
-            auto buffer = C2BlockToBitstreamBuffer(linearBlock, bitstreamId);
+            std::unique_ptr<BitstreamBuffer> buffer =
+                    std::make_unique<BitstreamBuffer>(bitstreamId, linearBlock.handle()->data[0],
+                                                      linearBlock.offset(), linearBlock.size());
             if (!buffer) {
                 reportError(C2_CORRUPTED);
                 return;
diff --git a/components/V4L2DecodeInterface.cpp b/components/V4L2DecodeInterface.cpp
index b14fbfc..a09fcc4 100644
--- a/components/V4L2DecodeInterface.cpp
+++ b/components/V4L2DecodeInterface.cpp
@@ -223,6 +223,9 @@
                                  MEDIA_MIMETYPE_VIDEO_RAW))
                          .build());
 
+    // Note(b/165826281): The check is not used at Android framework currently.
+    // In order to fasten the bootup time, we use the maximum supported size instead of querying the
+    // capability from the V4L2 device.
     addParameter(DefineParam(mSize, C2_PARAMKEY_PICTURE_SIZE)
                          .withDefault(new C2StreamPictureSizeInfo::output(0u, 320, 240))
                          .withFields({
diff --git a/components/V4L2Decoder.cpp b/components/V4L2Decoder.cpp
index a5ef3fb..26f1365 100644
--- a/components/V4L2Decoder.cpp
+++ b/components/V4L2Decoder.cpp
@@ -284,7 +284,7 @@
               request.buffer->offset);
         inputBuffer->SetPlaneDataOffset(0, request.buffer->offset);
         inputBuffer->SetPlaneBytesUsed(0, request.buffer->offset + request.buffer->size);
-        std::vector<::base::ScopedFD> fds;
+        std::vector<int> fds;
         fds.push_back(std::move(request.buffer->dmabuf_fd));
         std::move(*inputBuffer).QueueDMABuf(fds);
 
@@ -502,17 +502,15 @@
 
     if (mState == State::Idle) return;
 
-    if (mVideoFramePool->hasPendingRequests()) {
-        ALOGD("Previous callback is running, ignore.");
-        return;
-    }
-
     if (mOutputQueue->FreeBuffersCount() == 0) {
         ALOGD("No free V4L2 output buffers, ignore.");
         return;
     }
 
-    mVideoFramePool->getVideoFrame(::base::BindOnce(&V4L2Decoder::onVideoFrameReady, mWeakThis));
+    if (!mVideoFramePool->getVideoFrame(
+                ::base::BindOnce(&V4L2Decoder::onVideoFrameReady, mWeakThis))) {
+        ALOGV("%s(): Previous callback is running, ignore.", __func__);
+    }
 }
 
 void V4L2Decoder::onVideoFrameReady(
diff --git a/components/V4L2EncodeComponent.cpp b/components/V4L2EncodeComponent.cpp
index c13537e..ab2230e 100644
--- a/components/V4L2EncodeComponent.cpp
+++ b/components/V4L2EncodeComponent.cpp
@@ -15,6 +15,7 @@
 #include <C2AllocatorGralloc.h>
 #include <C2PlatformSupport.h>
 #include <C2Work.h>
+#include <android/hardware/graphics/common/1.0/types.h>
 #include <base/bind.h>
 #include <base/bind_helpers.h>
 #include <log/log.h>
@@ -24,12 +25,13 @@
 #include <fourcc.h>
 #include <h264_parser.h>
 #include <rect.h>
-#include <unaligned_shared_memory.h>
 #include <v4l2_codec2/common/Common.h>
 #include <v4l2_codec2/common/EncodeHelpers.h>
 #include <v4l2_device.h>
 #include <video_pixel_format.h>
 
+using android::hardware::graphics::common::V1_0::BufferUsage;
+
 namespace android {
 
 namespace {
@@ -40,7 +42,7 @@
 // TODO(dstaessens): Clean up code extracting layout from a C2GraphicBlock.
 std::optional<std::vector<VideoFramePlane>> getVideoFrameLayout(const C2ConstGraphicBlock& block,
                                                                 media::VideoPixelFormat* format) {
-    ALOGD("%s()", __func__);
+    ALOGV("%s()", __func__);
 
     // Get the C2PlanarLayout from the graphics block. The C2GraphicView returned by block.map()
     // needs to be released before calling getGraphicBlockInfo(), or the lockYCbCr() call will block
@@ -126,30 +128,6 @@
     return planes;
 }
 
-const uint8_t kH264StartCode[] = {0, 0, 0, 1};
-const size_t kH264StartCodeSize = sizeof(kH264StartCode);
-
-// Copy a H.264 NALU of size |srcSize| (without start code), located at |src|,into a buffer starting
-// at |dst| of size |dstSize|, prepending it with a H.264 start code (as long as both fit). After
-// copying, update |dst| to point to the address immediately after the copied data, and update
-// |dstSize| to contain remaining destination buffer size.
-void copyNALUPrependingStartCode(const uint8_t* src, size_t srcSize, uint8_t** dst,
-                                 size_t* dstSize) {
-    ALOGD("%s()", __func__);
-
-    size_t sizeToCopy = kH264StartCodeSize + srcSize;
-    if (sizeToCopy > *dstSize) {
-        ALOGE("Could not copy a NALU, not enough space in destination buffer");
-        return;
-    }
-
-    memcpy(*dst, kH264StartCode, kH264StartCodeSize);
-    memcpy(*dst + kH264StartCodeSize, src, srcSize);
-
-    *dst += sizeToCopy;
-    *dstSize -= sizeToCopy;
-}
-
 // The maximum size for output buffer, which is chosen empirically for a 1080p video.
 constexpr size_t kMaxBitstreamBufferSizeInBytes = 2 * 1024 * 1024;  // 2MB
 // The frame size for 1080p (FHD) video in pixels.
@@ -180,15 +158,10 @@
 // static
 std::unique_ptr<V4L2EncodeComponent::InputFrame> V4L2EncodeComponent::InputFrame::Create(
         const C2ConstGraphicBlock& block) {
-    std::vector<::base::ScopedFD> fds;
+    std::vector<int> fds;
     const C2Handle* const handle = block.handle();
     for (int i = 0; i < handle->numFds; i++) {
-        fds.emplace_back(dup(handle->data[i]));
-        if (!fds.back().is_valid()) {
-            ALOGE("Failed to duplicate input graphic block handle %d (errno: %d)", handle->data[i],
-                  errno);
-            return nullptr;
-        }
+        fds.emplace_back(handle->data[i]);
     }
 
     return std::unique_ptr<InputFrame>(new InputFrame(std::move(fds)));
@@ -518,25 +491,28 @@
         return;
     }
 
-    // All work that needs to be drained should be ready now, which means the output work queue
-    // should only contain a single item which is marked as EOS.
-    if (mOutputWorkQueue.size() != 1u ||
-        !(mOutputWorkQueue.front()->input.flags & C2FrameData::FLAG_END_OF_STREAM)) {
-        ALOGE("Output work queue should contain a single EOS item after draining");
+    // The last work item in the output work queue should be an EOS request.
+    if (mOutputWorkQueue.empty() ||
+        !(mOutputWorkQueue.back()->input.flags & C2FrameData::FLAG_END_OF_STREAM)) {
+        ALOGE("The last item in the output work queue should be marked EOS");
         reportError(C2_CORRUPTED);
         return;
     }
 
-    std::unique_ptr<C2Work> eosWork = std::move(mOutputWorkQueue.front());
+    // Mark the last item in the output work queue as EOS done.
+    C2Work* eosWork = mOutputWorkQueue.back().get();
+    eosWork->worklets.back()->output.flags = C2FrameData::FLAG_END_OF_STREAM;
+
+    // Draining is done which means all buffers on the device output queue have been returned, but
+    // not all buffers on the device input queue might have been returned yet.
+    if ((mOutputWorkQueue.size() > 1) || !isWorkDone(*eosWork)) {
+        ALOGV("Draining done, waiting for input buffers to be returned");
+        return;
+    }
+
+    ALOGV("Draining done");
+    reportWork(std::move(mOutputWorkQueue.front()));
     mOutputWorkQueue.pop_front();
-    eosWork->worklets.front()->output.flags = C2FrameData::FLAG_END_OF_STREAM;
-    if (!isWorkDone(*eosWork)) {
-        ALOGE("EOS work item should be done after draining");
-        reportError(C2_CORRUPTED);
-        return;
-    }
-
-    reportWork(std::move(eosWork));
 
     // Draining the encoder is now done, we can start encoding again.
     if (!mInputWorkQueue.empty()) {
@@ -717,9 +693,8 @@
     // buffer always seems to fail unless we copy it into a new a buffer first. As a temporary
     // workaround the line below is commented, but this should be undone once the issue is fixed.
     //if (mInputLayout->format() != inputFormat) {
-    ALOGV("Creating input format convertor (%s -> %s)",
-          media::VideoPixelFormatToString(mInputLayout->format()).c_str(),
-          media::VideoPixelFormatToString(inputFormat).c_str());
+    ALOGV("Creating input format convertor (%s)",
+          media::VideoPixelFormatToString(mInputLayout->format()).c_str());
     mInputFormatConverter =
             FormatConverter::Create(inputFormat, mVisibleSize, kInputBufferCount, mInputCodedSize);
     if (!mInputFormatConverter) {
@@ -824,7 +799,8 @@
 
     // When encoding H.264 we want to prepend SPS and PPS to each IDR for resilience. Some
     // devices support this through the V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR control.
-    // Otherwise we have to cache the latest SPS and PPS and inject these manually.
+    // TODO(b/161495502): V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR is currently not supported
+    // yet, just log a warning if the operation was unsuccessful for now.
     if (mDevice->IsCtrlExposed(V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR)) {
         if (!mDevice->SetExtCtrls(
                     V4L2_CTRL_CLASS_MPEG,
@@ -832,11 +808,9 @@
             ALOGE("Failed to configure device to prepend SPS and PPS to each IDR");
             return false;
         }
-        mInjectParamsBeforeIDR = false;
         ALOGV("Device supports prepending SPS and PPS to each IDR");
     } else {
-        mInjectParamsBeforeIDR = true;
-        ALOGV("Device doesn't support prepending SPS and PPS to IDR, injecting manually.");
+        ALOGW("Device doesn't support prepending SPS and PPS to IDR");
     }
 
     std::vector<media::V4L2ExtCtrl> h264Ctrls;
@@ -1102,8 +1076,8 @@
         startDevicePoll();
     }
 
-    // Allocate and queue all buffers on the output queue. These buffers will be used to store the
-    // encoded bitstreams.
+    // Queue all buffers on the output queue. These buffers will be used to store the encoded
+    // bitstreams.
     while (mOutputQueue->FreeBuffersCount() > 0) {
         if (!enqueueOutputBuffer()) return false;
     }
@@ -1154,7 +1128,8 @@
     }
 
     // Return all buffers to the input format convertor and clear all references to graphic blocks
-    // in the input queue.
+    // in the input queue. We don't need to clear the output map as those buffers will still be
+    // used.
     for (auto& it : mInputBuffersMap) {
         if (mInputFormatConverter && it.second) {
             mInputFormatConverter->returnBlock(it.first);
@@ -1190,7 +1165,10 @@
     ALOGV("Fetching linear block (size: %u)", mOutputBufferSize);
     std::shared_ptr<C2LinearBlock> outputBlock;
     c2_status_t status = mOutputBlockPool->fetchLinearBlock(
-            mOutputBufferSize, {C2MemoryUsage::CPU_READ, C2MemoryUsage::CPU_WRITE}, &outputBlock);
+            mOutputBufferSize,
+            C2MemoryUsage(C2MemoryUsage::CPU_READ |
+                          static_cast<uint64_t>(BufferUsage::VIDEO_ENCODER)),
+            &outputBlock);
     if (status != C2_OK) {
         ALOGE("Failed to fetch linear block (error: %d)", status);
         reportError(status);
@@ -1234,6 +1212,12 @@
         mOutputWorkQueue.pop_front();
     }
 
+    // We might have been waiting for input buffers to be returned after draining finished.
+    if (mEncoderState == EncoderState::DRAINING && mOutputWorkQueue.empty()) {
+        ALOGV("Draining done");
+        mEncoderState = EncoderState::WAITING_FOR_INPUT_BUFFERS;
+    }
+
     // If we previously used up all input queue buffers we can start encoding again now.
     if ((mEncoderState == EncoderState::WAITING_FOR_INPUT_BUFFERS) && !mInputWorkQueue.empty()) {
         setEncoderState(EncoderState::ENCODING);
@@ -1296,9 +1280,9 @@
     }
     work->worklets.front()->output.buffers.emplace_back(buffer);
 
-    // Return all completed work items. The work item might have been waiting for it's input buffer
-    // to be returned, in which case we can report it as completed now. As input buffers are not
-    // necessarily returned in order we might be able to return multiple ready work items now.
+    // We can report the work item as completed if its associated input buffer has also been
+    // released. As output buffers are not necessarily returned in order we might be able to return
+    // multiple ready work items now.
     while (!mOutputWorkQueue.empty() && isWorkDone(*mOutputWorkQueue.front())) {
         reportWork(std::move(mOutputWorkQueue.front()));
         mOutputWorkQueue.pop_front();
@@ -1306,7 +1290,7 @@
 }
 
 C2Work* V4L2EncodeComponent::getWorkByIndex(uint64_t index) {
-    ALOGD("%s(): getting work item (index: %" PRIu64 ")", __func__, index);
+    ALOGV("%s(): getting work item (index: %" PRIu64 ")", __func__, index);
     ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
 
     auto it = std::find_if(mOutputWorkQueue.begin(), mOutputWorkQueue.end(),
@@ -1321,7 +1305,7 @@
 }
 
 C2Work* V4L2EncodeComponent::getWorkByTimestamp(int64_t timestamp) {
-    ALOGD("%s(): getting work item (timestamp: %" PRId64 ")", __func__, timestamp);
+    ALOGV("%s(): getting work item (timestamp: %" PRId64 ")", __func__, timestamp);
     ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
     ALOG_ASSERT(timestamp >= 0);
 
@@ -1342,23 +1326,26 @@
 }
 
 bool V4L2EncodeComponent::isWorkDone(const C2Work& work) const {
-    ALOGD("%s()", __func__);
+    ALOGV("%s()", __func__);
     ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
 
     if ((work.input.flags & C2FrameData::FLAG_END_OF_STREAM) &&
         !(work.worklets.front()->output.flags & C2FrameData::FLAG_END_OF_STREAM)) {
-        ALOGV("Work item is marked as EOS but draining has not finished yet");
+        ALOGV("Work item %" PRIu64 " is marked as EOS but draining has not finished yet",
+              work.input.ordinal.frameIndex.peeku());
         return false;
     }
 
     if (!work.input.buffers.empty() && work.input.buffers.front()) {
-        ALOGV("Input buffer associated with work item not returned yet");
+        ALOGV("Input buffer associated with work item %" PRIu64 " not returned yet",
+              work.input.ordinal.frameIndex.peeku());
         return false;
     }
 
     // If the work item had an input buffer to be encoded, it should have an output buffer set.
     if (!work.input.buffers.empty() && work.worklets.front()->output.buffers.empty()) {
-        ALOGV("Output buffer associated with work item not returned yet");
+        ALOGV("Output buffer associated with work item %" PRIu64 " not returned yet",
+              work.input.ordinal.frameIndex.peeku());
         return false;
     }
 
@@ -1423,20 +1410,16 @@
         return;
     }
 
-    // Dequeue completed output (VIDEO_CAPTURE) buffers, and recycle to the free list. We dequeue
-    // from the output queue first. Otherwise there is a very small change that a drain is completed
-    // between dequeuing from input and output queue, which would cause us to call OnDrainDone()
-    // without all input buffers being returned yet. This is not a strict requirement but makes the
-    // OnDrainDone() function a lot simpler.
-    while (mOutputQueue->QueuedBuffersCount() > 0) {
-        if (!dequeueOutputBuffer()) break;
-    }
-
     // Dequeue completed input (VIDEO_OUTPUT) buffers, and recycle to the free list.
     while (mInputQueue->QueuedBuffersCount() > 0) {
         if (!dequeueInputBuffer()) break;
     }
 
+    // Dequeue completed output (VIDEO_CAPTURE) buffers, and recycle to the free list.
+    while (mOutputQueue->QueuedBuffersCount() > 0) {
+        if (!dequeueOutputBuffer()) break;
+    }
+
     ALOGV("%s() - done", __func__);
 }
 
@@ -1507,13 +1490,26 @@
         return false;
     }
 
-    size_t buffer_id = buffer->BufferId();
-    if (!std::move(*buffer).QueueMMap()) {
-        ALOGE("Failed to queue auto buffer using QueueMMap");
+    std::shared_ptr<C2LinearBlock> outputBlock = fetchOutputBlock();
+    if (!outputBlock) {
+        ALOGE("Failed to fetch output block");
+        reportError(C2_CORRUPTED);
         return false;
     }
 
-    ALOGV("%s(): Queued buffer in output queue (bufferId: %zu)", __func__, buffer_id);
+    size_t bufferId = buffer->BufferId();
+
+    std::vector<int> fds;
+    fds.push_back(outputBlock->handle()->data[0]);
+    if (!std::move(*buffer).QueueDMABuf(fds)) {
+        ALOGE("Failed to queue output buffer using QueueDMABuf");
+        reportError(C2_CORRUPTED);
+        return false;
+    }
+
+    ALOG_ASSERT(!mOutputBuffersMap[bufferId]);
+    mOutputBuffersMap[bufferId] = std::move(outputBlock);
+    ALOGV("%s(): Queued buffer in output queue (bufferId: %zu)", __func__, bufferId);
     return true;
 }
 
@@ -1575,30 +1571,20 @@
           ", bufferId: %zu, data size: %zu, EOS: %d)",
           timestamp.InMicroseconds(), buffer->BufferId(), encodedDataSize, buffer->IsLast());
 
-    // If the output buffer contains encoded data we need to allocate a new output block and copy
-    // the encoded buffer data.
-    // TODO(dstaessens): Avoid always performing copy while outputting buffers.
-    if (encodedDataSize > 0) {
-        std::shared_ptr<C2LinearBlock> outputBlock = fetchOutputBlock();
-        if (!outputBlock) {
-            ALOGE("Failed to create output block");
-            reportError(C2_CORRUPTED);
-            return false;
-        }
-        size_t outputDataSize = copyIntoOutputBuffer(buffer, *outputBlock);
-        if (outputDataSize == 0) {
-            ALOGE("Invalid output buffer size");
-            reportError(C2_CORRUPTED);
-            return false;
-        }
+    if (!mOutputBuffersMap[buffer->BufferId()]) {
+        ALOGE("Failed to find output block associated with output buffer");
+        reportError(C2_CORRUPTED);
+        return false;
+    }
 
-        onOutputBufferDone(outputDataSize, buffer->IsKeyframe(), timestamp.InMicroseconds(),
-                           outputBlock);
+    std::shared_ptr<C2LinearBlock> block = std::move(mOutputBuffersMap[buffer->BufferId()]);
+    if (encodedDataSize > 0) {
+        onOutputBufferDone(encodedDataSize, buffer->IsKeyframe(), timestamp.InMicroseconds(),
+                           std::move(block));
     }
 
     // If the buffer is marked as last and we were flushing the encoder, flushing is now done.
     if ((mEncoderState == EncoderState::DRAINING) && buffer->IsLast()) {
-        ALOG_ASSERT(mInputQueue->QueuedBuffersCount() == 0);
         onDrainDone(true);
 
         // Start the encoder again.
@@ -1612,8 +1598,7 @@
         }
     }
 
-    // We copied the result into an output block, so we can immediately free the output buffer and
-    // enqueue it again so it can be reused.
+    // Queue a new output buffer to replace the one we dequeued.
     buffer = nullptr;
     enqueueOutputBuffer();
 
@@ -1641,6 +1626,7 @@
     ALOGV("%s()", __func__);
     ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
     ALOG_ASSERT(!mOutputQueue->IsStreaming());
+    ALOG_ASSERT(mOutputBuffersMap.empty());
 
     // Fetch the output block pool.
     C2BlockPool::local_id_t poolId = mInterface->getBlockPoolId();
@@ -1650,13 +1636,15 @@
         return false;
     }
 
-    // Memory is allocated here to decode into. The encoded result is copied to a blockpool buffer
-    // upon dequeuing.
-    if (mOutputQueue->AllocateBuffers(kOutputBufferCount, V4L2_MEMORY_MMAP) < kOutputBufferCount) {
-        ALOGE("Failed to allocate V4L2 output buffers.");
+    // No memory is allocated here, we just generate a list of buffers on the output queue, which
+    // will hold memory handles to the real buffers.
+    if (mOutputQueue->AllocateBuffers(kOutputBufferCount, V4L2_MEMORY_DMABUF) <
+        kOutputBufferCount) {
+        ALOGE("Failed to create V4L2 output buffers.");
         return false;
     }
 
+    mOutputBuffersMap.resize(mOutputQueue->AllocatedBuffersCount());
     return true;
 }
 
@@ -1677,103 +1665,10 @@
 
     if (!mOutputQueue || mOutputQueue->AllocatedBuffersCount() == 0) return;
     mOutputQueue->DeallocateBuffers();
+    mOutputBuffersMap.clear();
     mOutputBlockPool.reset();
 }
 
-size_t V4L2EncodeComponent::copyIntoOutputBuffer(
-        scoped_refptr<media::V4L2ReadableBuffer> outputBuffer, const C2LinearBlock& outputBlock) {
-    ALOGV("%s()", __func__);
-    ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
-
-    const uint8_t* src = static_cast<const uint8_t*>(outputBuffer->GetPlaneMapping(0)) +
-                         outputBuffer->GetPlaneDataOffset(0);
-    size_t srcSize = outputBuffer->GetPlaneBytesUsed(0) - outputBuffer->GetPlaneDataOffset(0);
-
-    // TODO(dstaessens): Investigate mapping output block directly.
-    int dupFd = dup(outputBlock.handle()->data[0]);
-    if (dupFd < 0) {
-        ALOGE("Failed to duplicate output buffer handle (errno: %d)", errno);
-        reportError(C2_CORRUPTED);
-        return 0;
-    }
-    ::base::SharedMemoryHandle shmHandle(::base::FileDescriptor(dupFd, true), 0u,
-                                         ::base::UnguessableToken::Create());
-    auto dest =
-            std::make_unique<media::UnalignedSharedMemory>(shmHandle, outputBlock.size(), false);
-    if (!dest->MapAt(outputBlock.offset(), outputBlock.size())) {
-        ALOGE("Failed to map output buffer");
-        return 0;
-    }
-
-    uint8_t* dstPtr = static_cast<uint8_t*>(dest->memory());
-    size_t remainingDstSize = dest->size();
-
-    if (!mInjectParamsBeforeIDR) {
-        if (srcSize <= remainingDstSize) {
-            memcpy(dstPtr, src, srcSize);
-            return srcSize;
-        } else {
-            ALOGE("Output data did not fit in the BitstreamBuffer");
-            return 0;
-        }
-    }
-
-    // Cache the newest SPS and PPS found in the stream, and inject them before each IDR found.
-    media::H264Parser parser;
-    parser.SetStream(src, srcSize);
-    media::H264NALU nalu;
-    // TODO(dstaessens): Split SPS and PPS case?
-    bool streamParamsFound = false;
-
-    while (parser.AdvanceToNextNALU(&nalu) == media::H264Parser::kOk) {
-        // nalu.size is always without the start code, regardless of the NALU type.
-        if (nalu.size + kH264StartCodeSize > remainingDstSize) {
-            ALOGE("Output data did not fit in the BitstreamBuffer");
-            break;
-        }
-
-        switch (nalu.nal_unit_type) {
-        case media::H264NALU::kSPS:
-            mCachedSPS.resize(nalu.size);
-            memcpy(mCachedSPS.data(), nalu.data, nalu.size);
-            streamParamsFound = true;
-            ALOGE("Updated cached SPS");
-            break;
-        case media::H264NALU::kPPS:
-            mCachedPPS.resize(nalu.size);
-            memcpy(mCachedPPS.data(), nalu.data, nalu.size);
-            streamParamsFound = true;
-            ALOGE("Updated cached PPS");
-            break;
-        case media::H264NALU::kIDRSlice:
-            if (streamParamsFound) {
-                ALOGE("Not injecting stream header before IDR, already present");
-                break;
-            }
-            // Only inject if we have both headers cached, and enough space for both the headers
-            // including the H.264 start codes and the NALU itself.
-            size_t h264HeaderSize =
-                    mCachedSPS.size() + mCachedPPS.size() + (2 * kH264StartCodeSize);
-            if (mCachedSPS.empty() || mCachedPPS.empty() ||
-                (h264HeaderSize + nalu.size + kH264StartCodeSize > remainingDstSize)) {
-                ALOGE("Not enough space to inject a stream header before IDR");
-                break;
-            }
-
-            copyNALUPrependingStartCode(mCachedSPS.data(), mCachedSPS.size(), &dstPtr,
-                                        &remainingDstSize);
-            copyNALUPrependingStartCode(mCachedPPS.data(), mCachedPPS.size(), &dstPtr,
-                                        &remainingDstSize);
-            ALOGV("Stream header injected before IDR");
-            break;
-        }
-
-        copyNALUPrependingStartCode(nalu.data, nalu.size, &dstPtr, &remainingDstSize);
-    }
-
-    return dest->size() - remainingDstSize;
-}
-
 void V4L2EncodeComponent::reportError(c2_status_t error) {
     ALOGV("%s()", __func__);
     ALOG_ASSERT(mEncoderTaskRunner->RunsTasksInCurrentSequence());
diff --git a/components/V4L2EncodeInterface.cpp b/components/V4L2EncodeInterface.cpp
index 50bc4aa..9e6b556 100644
--- a/components/V4L2EncodeInterface.cpp
+++ b/components/V4L2EncodeInterface.cpp
@@ -11,6 +11,7 @@
 
 #include <C2PlatformSupport.h>
 #include <SimpleC2Interface.h>
+#include <android/hardware/graphics/common/1.0/types.h>
 #include <media/stagefright/MediaDefs.h>
 #include <utils/Log.h>
 
@@ -18,6 +19,8 @@
 #include <v4l2_codec2/common/V4L2ComponentCommon.h>
 #include <video_codecs.h>
 
+using android::hardware::graphics::common::V1_0::BufferUsage;
+
 namespace android {
 
 namespace {
@@ -333,6 +336,18 @@
                     .withConstValue(new C2StreamBufferTypeSetting::input(0u, C2BufferData::GRAPHIC))
                     .build());
 
+    // TODO(b/167640667) Add VIDEO_ENCODER flag once input convertor is not enabled by default.
+    // When using the format convertor (which is currently always enabled) it's not useful to add
+    // the VIDEO_ENCODER buffer flag for input buffers here. Currently zero-copy is not supported
+    // yet, so when using this flag an additional buffer will be allocated on host side and a copy
+    // will be performed between the guest and host buffer to keep them in sync. This is wasteful as
+    // the buffer is only used on guest side by the format convertor which converts and copies the
+    // buffer into another buffer.
+    //addParameter(DefineParam(mInputMemoryUsage, C2_PARAMKEY_INPUT_STREAM_USAGE)
+    //                     .withConstValue(new C2StreamUsageTuning::input(
+    //                             0u, static_cast<uint64_t>(BufferUsage::VIDEO_ENCODER)))
+    //                     .build());
+
     addParameter(
             DefineParam(mOutputFormat, C2_PARAMKEY_OUTPUT_STREAM_BUFFER_TYPE)
                     .withConstValue(new C2StreamBufferTypeSetting::output(0u, C2BufferData::LINEAR))
diff --git a/components/VideoFrame.cpp b/components/VideoFrame.cpp
index bcdb283..cb5efb7 100644
--- a/components/VideoFrame.cpp
+++ b/components/VideoFrame.cpp
@@ -16,25 +16,21 @@
 std::unique_ptr<VideoFrame> VideoFrame::Create(std::shared_ptr<C2GraphicBlock> block) {
     if (!block) return nullptr;
 
-    std::vector<::base::ScopedFD> fds;
+    std::vector<int> fds;
     const C2Handle* const handle = block->handle();
     for (int i = 0; i < handle->numFds; i++) {
-        fds.emplace_back(dup(handle->data[i]));
-        if (!fds.back().is_valid()) {
-            ALOGE("Failed to dup(%d), errno=%d", handle->data[i], errno);
-            return nullptr;
-        }
+        fds.emplace_back(handle->data[i]);
     }
 
     return std::unique_ptr<VideoFrame>(new VideoFrame(std::move(block), std::move(fds)));
 }
 
-VideoFrame::VideoFrame(std::shared_ptr<C2GraphicBlock> block, std::vector<::base::ScopedFD> fds)
-      : mGraphicBlock(std::move(block)), mFds(std::move(fds)) {}
+VideoFrame::VideoFrame(std::shared_ptr<C2GraphicBlock> block, std::vector<int> fds)
+      : mGraphicBlock(std::move(block)), mFds(fds) {}
 
 VideoFrame::~VideoFrame() = default;
 
-const std::vector<::base::ScopedFD>& VideoFrame::getFDs() const {
+const std::vector<int>& VideoFrame::getFDs() const {
     return mFds;
 }
 
diff --git a/components/VideoFramePool.cpp b/components/VideoFramePool.cpp
index 662ba6e..b6bbfab 100644
--- a/components/VideoFramePool.cpp
+++ b/components/VideoFramePool.cpp
@@ -16,7 +16,7 @@
 #include <base/time/time.h>
 #include <log/log.h>
 
-#include <v4l2_codec2/components/VideoTypes.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/plugin_store/C2VdaBqBlockPool.h>
 #include <v4l2_codec2/plugin_store/C2VdaPooledBlockPool.h>
 #include <v4l2_codec2/plugin_store/V4L2AllocatorId.h>
@@ -57,6 +57,17 @@
 }
 
 // static
+bool VideoFramePool::setNotifyBlockAvailableCb(C2BlockPool& blockPool, ::base::OnceClosure cb) {
+    ALOGV("%s() blockPool.getAllocatorId() = %u", __func__, blockPool.getAllocatorId());
+
+    if (blockPool.getAllocatorId() == C2PlatformAllocatorStore::BUFFERQUEUE) {
+        C2VdaBqBlockPool* bqPool = static_cast<C2VdaBqBlockPool*>(&blockPool);
+        return bqPool->setNotifyBlockAvailableCb(std::move(cb));
+    }
+    return false;
+}
+
+// static
 std::unique_ptr<VideoFramePool> VideoFramePool::Create(
         std::shared_ptr<C2BlockPool> blockPool, const size_t numBuffers, const media::Size& size,
         HalPixelFormat pixelFormat, bool isSecure,
@@ -106,7 +117,6 @@
     ALOG_ASSERT(mClientTaskRunner->RunsTasksInCurrentSequence());
 
     mClientWeakThisFactory.InvalidateWeakPtrs();
-    mCancelGetFrame = true;
 
     if (mFetchThread.IsRunning()) {
         mFetchTaskRunner->PostTask(FROM_HERE,
@@ -122,87 +132,101 @@
     mFetchWeakThisFactory.InvalidateWeakPtrs();
 }
 
-void VideoFramePool::getVideoFrame(GetVideoFrameCB cb) {
+bool VideoFramePool::getVideoFrame(GetVideoFrameCB cb) {
     ALOGV("%s()", __func__);
     ALOG_ASSERT(mClientTaskRunner->RunsTasksInCurrentSequence());
 
-    ++mNumPendingRequests;
-    mFetchTaskRunner->PostTask(FROM_HERE, ::base::BindOnce(&VideoFramePool::getVideoFrameTask,
-                                                           mFetchWeakThis, std::move(cb)));
+    if (mOutputCb) {
+        return false;
+    }
+
+    mOutputCb = std::move(cb);
+    mFetchTaskRunner->PostTask(
+            FROM_HERE, ::base::BindOnce(&VideoFramePool::getVideoFrameTask, mFetchWeakThis));
+    return true;
 }
 
-bool VideoFramePool::hasPendingRequests() const {
+// static
+void VideoFramePool::getVideoFrameTaskThunk(
+        scoped_refptr<::base::SequencedTaskRunner> taskRunner,
+        std::optional<::base::WeakPtr<VideoFramePool>> weakPool) {
     ALOGV("%s()", __func__);
-    ALOG_ASSERT(mClientTaskRunner->RunsTasksInCurrentSequence());
+    ALOG_ASSERT(weakPool);
 
-    return mNumPendingRequests > 0;
+    taskRunner->PostTask(FROM_HERE,
+                         ::base::BindOnce(&VideoFramePool::getVideoFrameTask, *weakPool));
 }
 
-void VideoFramePool::getVideoFrameTask(GetVideoFrameCB cb) {
+void VideoFramePool::getVideoFrameTask() {
     ALOGV("%s()", __func__);
     ALOG_ASSERT(mFetchTaskRunner->RunsTasksInCurrentSequence());
-    // Initial delay: 64us
-    constexpr size_t kFetchRetryDelayInit = 64;
-    // Max delay: 16ms (1 frame at 60fps)
-    constexpr size_t kFetchRetryDelayMax = 16384;
-    std::optional<FrameWithBlockId> frameWithBlockId;
 
-    size_t numRetries = 0;
-    size_t delay = kFetchRetryDelayInit;
-    while (true) {
-        if (mCancelGetFrame) {
-            ALOGW("Request to get frame canceled after %zu retries", numRetries);
-            break;
-        }
+    // Variables used to exponential backoff retry when buffer fetching times out.
+    constexpr size_t kFetchRetryDelayInit = 64;    // Initial delay: 64us
+    constexpr size_t kFetchRetryDelayMax = 16384;  // Max delay: 16ms (1 frame at 60fps)
+    static size_t sNumRetries = 0;
+    static size_t sDelay = kFetchRetryDelayInit;
 
-        std::shared_ptr<C2GraphicBlock> block;
-        c2_status_t err = mBlockPool->fetchGraphicBlock(mSize.width(), mSize.height(),
-                                                        static_cast<uint32_t>(mPixelFormat),
-                                                        mMemoryUsage, &block);
-
-        if (err == C2_OK) {
-            ALOG_ASSERT(block != nullptr);
-            std::optional<uint32_t> bufferId = getBufferIdFromGraphicBlock(*mBlockPool, *block);
-            std::unique_ptr<VideoFrame> frame = VideoFrame::Create(std::move(block));
-            // Only pass the frame + id pair if both have successfully been obtained.
-            // Otherwise exit the loop so a nullopt is passed to the client.
-            if (bufferId && frame) {
-                frameWithBlockId = std::make_pair(std::move(frame), *bufferId);
-            }
-            break;
-        } else if (err != C2_TIMED_OUT && err != C2_BLOCKING) {
-            ALOGE("Failed to fetch block, err=%d, retry %zu times", err, numRetries);
-            break;
+    std::shared_ptr<C2GraphicBlock> block;
+    c2_status_t err = mBlockPool->fetchGraphicBlock(mSize.width(), mSize.height(),
+                                                    static_cast<uint32_t>(mPixelFormat),
+                                                    mMemoryUsage, &block);
+    if (err == C2_TIMED_OUT || err == C2_BLOCKING) {
+        if (setNotifyBlockAvailableCb(*mBlockPool,
+                                      ::base::BindOnce(&VideoFramePool::getVideoFrameTaskThunk,
+                                                       mFetchTaskRunner, mFetchWeakThis))) {
+            ALOGV("%s(): fetchGraphicBlock() timeout, waiting for block available.", __func__);
         } else {
-            ++numRetries;
-            ALOGD("fetchGraphicBlock() timeout, waiting %zuus (%zu retry)", delay, numRetries);
-            usleep(delay);
-            // Exponential backoff
-            delay = std::min(delay * 2, kFetchRetryDelayMax);
+            ALOGV("%s(): fetchGraphicBlock() timeout, waiting %zuus (%zu retry)", __func__, sDelay,
+                  sNumRetries + 1);
+            mFetchTaskRunner->PostDelayedTask(
+                    FROM_HERE, ::base::BindOnce(&VideoFramePool::getVideoFrameTask, mFetchWeakThis),
+                    ::base::TimeDelta::FromMicroseconds(sDelay));
+
+            sDelay = std::min(sDelay * 2, kFetchRetryDelayMax);  // Exponential backoff
+            sNumRetries++;
         }
+
+        return;
+    }
+
+    // Reset to the default value.
+    sNumRetries = 0;
+    sDelay = kFetchRetryDelayInit;
+
+    std::optional<FrameWithBlockId> frameWithBlockId;
+    if (err == C2_OK) {
+        ALOG_ASSERT(block != nullptr);
+        std::optional<uint32_t> bufferId = getBufferIdFromGraphicBlock(*mBlockPool, *block);
+        std::unique_ptr<VideoFrame> frame = VideoFrame::Create(std::move(block));
+        // Only pass the frame + id pair if both have successfully been obtained.
+        // Otherwise exit the loop so a nullopt is passed to the client.
+        if (bufferId && frame) {
+            frameWithBlockId = std::make_pair(std::move(frame), *bufferId);
+        } else {
+            ALOGE("%s(): Failed to generate VideoFrame or get the buffer id.", __func__);
+        }
+    } else {
+        ALOGE("%s(): Failed to fetch block, err=%d", __func__, err);
     }
 
     mClientTaskRunner->PostTask(
             FROM_HERE, ::base::BindOnce(&VideoFramePool::onVideoFrameReady, mClientWeakThis,
-                                        std::move(cb), std::move(frameWithBlockId)));
+                                        std::move(frameWithBlockId)));
 }
 
-void VideoFramePool::onVideoFrameReady(GetVideoFrameCB cb,
-                                       std::optional<FrameWithBlockId> frameWithBlockId) {
+void VideoFramePool::onVideoFrameReady(std::optional<FrameWithBlockId> frameWithBlockId) {
     ALOGV("%s()", __func__);
     ALOG_ASSERT(mClientTaskRunner->RunsTasksInCurrentSequence());
 
-    --mNumPendingRequests;
-
     if (!frameWithBlockId) {
         ALOGE("Failed to get GraphicBlock, abandoning all pending requests.");
         mClientWeakThisFactory.InvalidateWeakPtrs();
         mClientWeakThis = mClientWeakThisFactory.GetWeakPtr();
-
-        mNumPendingRequests = 0;
     }
 
-    std::move(cb).Run(std::move(frameWithBlockId));
+    ALOG_ASSERT(mOutputCb);
+    std::move(mOutputCb).Run(std::move(frameWithBlockId));
 }
 
 }  // namespace android
diff --git a/components/include/v4l2_codec2/components/BitstreamBuffer.h b/components/include/v4l2_codec2/components/BitstreamBuffer.h
index cc8d3f6..ec8a917 100644
--- a/components/include/v4l2_codec2/components/BitstreamBuffer.h
+++ b/components/include/v4l2_codec2/components/BitstreamBuffer.h
@@ -11,14 +11,16 @@
 
 namespace android {
 
+// The BitstreamBuffer class can be used to store encoded video data.
+// Note: The BitstreamBuffer does not take ownership of the data. The file descriptor is not
+//       duplicated and the caller is responsible for keeping the data alive.
 struct BitstreamBuffer {
-    BitstreamBuffer(const int32_t id, base::ScopedFD dmabuf_fd, const size_t offset,
-                    const size_t size)
-          : id(id), dmabuf_fd(std::move(dmabuf_fd)), offset(offset), size(size) {}
+    BitstreamBuffer(const int32_t id, int dmabuf_fd, const size_t offset, const size_t size)
+          : id(id), dmabuf_fd(dmabuf_fd), offset(offset), size(size) {}
     ~BitstreamBuffer() = default;
 
     const int32_t id;
-    base::ScopedFD dmabuf_fd;
+    int dmabuf_fd;
     const size_t offset;
     const size_t size;
 };
diff --git a/components/include/v4l2_codec2/components/V4L2DecodeInterface.h b/components/include/v4l2_codec2/components/V4L2DecodeInterface.h
index 46c565e..b57f6c1 100644
--- a/components/include/v4l2_codec2/components/V4L2DecodeInterface.h
+++ b/components/include/v4l2_codec2/components/V4L2DecodeInterface.h
@@ -12,7 +12,7 @@
 #include <util/C2InterfaceHelper.h>
 
 #include <size.h>
-#include <v4l2_codec2/components/VideoTypes.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 
 namespace android {
 
diff --git a/components/include/v4l2_codec2/components/V4L2Decoder.h b/components/include/v4l2_codec2/components/V4L2Decoder.h
index eb12950..5539042 100644
--- a/components/include/v4l2_codec2/components/V4L2Decoder.h
+++ b/components/include/v4l2_codec2/components/V4L2Decoder.h
@@ -15,10 +15,10 @@
 
 #include <rect.h>
 #include <size.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/components/VideoDecoder.h>
 #include <v4l2_codec2/components/VideoFrame.h>
 #include <v4l2_codec2/components/VideoFramePool.h>
-#include <v4l2_codec2/components/VideoTypes.h>
 #include <v4l2_device.h>
 
 namespace android {
diff --git a/components/include/v4l2_codec2/components/V4L2EncodeComponent.h b/components/include/v4l2_codec2/components/V4L2EncodeComponent.h
index 8a9459c..4a61e05 100644
--- a/components/include/v4l2_codec2/components/V4L2EncodeComponent.h
+++ b/components/include/v4l2_codec2/components/V4L2EncodeComponent.h
@@ -16,7 +16,6 @@
 #include <C2Param.h>
 #include <C2ParamDef.h>
 #include <SimpleC2Interface.h>
-#include <base/files/scoped_file.h>
 #include <base/memory/scoped_refptr.h>
 #include <base/single_thread_task_runner.h>
 #include <base/synchronization/waitable_event.h>
@@ -69,11 +68,11 @@
         static std::unique_ptr<InputFrame> Create(const C2ConstGraphicBlock& block);
         ~InputFrame() = default;
 
-        const std::vector<::base::ScopedFD>& getFDs() const { return mFds; }
+        const std::vector<int>& getFDs() const { return mFds; }
 
     private:
-        InputFrame(std::vector<::base::ScopedFD> fds) : mFds(std::move(fds)) {}
-        const std::vector<::base::ScopedFD> mFds;
+        InputFrame(std::vector<int> fds) : mFds(std::move(fds)) {}
+        const std::vector<int> mFds;
     };
 
     // Possible component states.
@@ -192,12 +191,6 @@
     // Destroy the output buffers on the V4L2 device output queue.
     void destroyOutputBuffers();
 
-    // Copy the encoded data stream in |outputBuffer| to the specified |outputBlock|. If required
-    // stream headers will be injected into key frames. Returns the size in bytes of the resulting
-    // output block.
-    size_t copyIntoOutputBuffer(scoped_refptr<media::V4L2ReadableBuffer> outputBuffer,
-                                const C2LinearBlock& outputBlock);
-
     // Notify the client an error occurred and switch to the error state.
     void reportError(c2_status_t error);
 
@@ -250,16 +243,6 @@
     // Key frame counter, a key frame will be requested each time it reaches zero.
     uint32_t mKeyFrameCounter = 0;
 
-    // Whether we need to manually cache and prepend the SPS and PPS to each IDR frame. When
-    // encoding H.264 we prepend each IDR with SPS and PPS for resilience. Some devices support this
-    // via the V4L2_CID_MPEG_VIDEO_H264_SPS_PPS_BEFORE_IDR control. For devices without support for
-    // this control we cache the latest SPS and PPS and manually inject them into the stream before
-    // every IDR.
-    bool mInjectParamsBeforeIDR = false;
-    // The latest cached SPS (without H.264 start code).
-    std::vector<uint8_t> mCachedSPS;
-    // The latest cached PPS (without H.264 start code).
-    std::vector<uint8_t> mCachedPPS;
     // Whether we extracted and submitted CSD (codec-specific data, e.g. H.264 SPS) to the framework.
     bool mCSDSubmitted = false;
 
@@ -270,6 +253,10 @@
 
     // List of work item indices and frames associated with each buffer in the device input queue.
     std::vector<std::pair<int64_t, std::unique_ptr<InputFrame>>> mInputBuffersMap;
+
+    // Map of buffer indices and output blocks associated with each buffer in the output queue. This
+    // map keeps the C2LinearBlock buffers alive so we can avoid duplicated fds.
+    std::vector<std::shared_ptr<C2LinearBlock>> mOutputBuffersMap;
     // The output block pool.
     std::shared_ptr<C2BlockPool> mOutputBlockPool;
 
diff --git a/components/include/v4l2_codec2/components/V4L2EncodeInterface.h b/components/include/v4l2_codec2/components/V4L2EncodeInterface.h
index e41ddff..f480d25 100644
--- a/components/include/v4l2_codec2/components/V4L2EncodeInterface.h
+++ b/components/include/v4l2_codec2/components/V4L2EncodeInterface.h
@@ -60,6 +60,8 @@
 
     // The input format kind; should be C2FormatVideo.
     std::shared_ptr<C2StreamBufferTypeSetting::input> mInputFormat;
+    // The memory usage flag of input buffer; should be BufferUsage::VIDEO_ENCODER.
+    std::shared_ptr<C2StreamUsageTuning::input> mInputMemoryUsage;
     // The output format kind; should be C2FormatCompressed.
     std::shared_ptr<C2StreamBufferTypeSetting::output> mOutputFormat;
     // The MIME type of input port; should be MEDIA_MIMETYPE_VIDEO_RAW.
diff --git a/components/include/v4l2_codec2/components/VideoDecoder.h b/components/include/v4l2_codec2/components/VideoDecoder.h
index 99d16b3..c737c65 100644
--- a/components/include/v4l2_codec2/components/VideoDecoder.h
+++ b/components/include/v4l2_codec2/components/VideoDecoder.h
@@ -9,12 +9,11 @@
 #include <memory>
 
 #include <base/callback.h>
-#include <base/files/scoped_file.h>
 
+#include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/components/BitstreamBuffer.h>
 #include <v4l2_codec2/components/VideoFrame.h>
 #include <v4l2_codec2/components/VideoFramePool.h>
-#include <v4l2_codec2/components/VideoTypes.h>
 
 namespace android {
 
diff --git a/components/include/v4l2_codec2/components/VideoFrame.h b/components/include/v4l2_codec2/components/VideoFrame.h
index f666f4d..395a52b 100644
--- a/components/include/v4l2_codec2/components/VideoFrame.h
+++ b/components/include/v4l2_codec2/components/VideoFrame.h
@@ -9,7 +9,6 @@
 #include <vector>
 
 #include <C2Buffer.h>
-#include <base/files/scoped_file.h>
 
 #include <rect.h>
 
@@ -23,7 +22,7 @@
     ~VideoFrame();
 
     // Return the file descriptors of the corresponding buffer.
-    const std::vector<::base::ScopedFD>& getFDs() const;
+    const std::vector<int>& getFDs() const;
 
     // Getter and setter of the visible rectangle.
     void setVisibleRect(const media::Rect& visibleRect);
@@ -37,10 +36,10 @@
     C2ConstGraphicBlock getGraphicBlock();
 
 private:
-    VideoFrame(std::shared_ptr<C2GraphicBlock> block, std::vector<::base::ScopedFD> fds);
+    VideoFrame(std::shared_ptr<C2GraphicBlock> block, std::vector<int> fds);
 
     std::shared_ptr<C2GraphicBlock> mGraphicBlock;
-    std::vector<::base::ScopedFD> mFds;
+    std::vector<int> mFds;
     media::Rect mVisibleRect;
     int32_t mBitstreamId = -1;
 };
diff --git a/components/include/v4l2_codec2/components/VideoFramePool.h b/components/include/v4l2_codec2/components/VideoFramePool.h
index c692cb0..71bfe27 100644
--- a/components/include/v4l2_codec2/components/VideoFramePool.h
+++ b/components/include/v4l2_codec2/components/VideoFramePool.h
@@ -17,8 +17,8 @@
 #include <base/threading/thread.h>
 
 #include <size.h>
+#include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/components/VideoFrame.h>
-#include <v4l2_codec2/components/VideoTypes.h>
 
 namespace android {
 
@@ -28,7 +28,7 @@
 class VideoFramePool {
 public:
     using FrameWithBlockId = std::pair<std::unique_ptr<VideoFrame>, uint32_t>;
-    using GetVideoFrameCB = base::OnceCallback<void(std::optional<FrameWithBlockId>)>;
+    using GetVideoFrameCB = ::base::OnceCallback<void(std::optional<FrameWithBlockId>)>;
 
     static std::unique_ptr<VideoFramePool> Create(
             std::shared_ptr<C2BlockPool> blockPool, const size_t numBuffers,
@@ -37,11 +37,10 @@
     ~VideoFramePool();
 
     // Get a VideoFrame instance, which will be passed via |cb|.
-    // If any error occurs, then pass nullptr.
-    void getVideoFrame(GetVideoFrameCB cb);
-
-    // Return true if any callback of getting VideoFrame instance is pending.
-    bool hasPendingRequests() const;
+    // If any error occurs, then nullptr will be passed via |cb|.
+    // Return false if the previous callback has not been called, and |cb| will
+    // be dropped directly.
+    bool getVideoFrame(GetVideoFrameCB cb);
 
 private:
     // |blockPool| is the C2BlockPool that we fetch graphic blocks from.
@@ -55,8 +54,10 @@
     bool initialize();
     void destroyTask();
 
-    void getVideoFrameTask(GetVideoFrameCB cb);
-    void onVideoFrameReady(GetVideoFrameCB cb, std::optional<FrameWithBlockId> frameWithBlockId);
+    static void getVideoFrameTaskThunk(scoped_refptr<::base::SequencedTaskRunner> taskRunner,
+                                       std::optional<::base::WeakPtr<VideoFramePool>> weakPool);
+    void getVideoFrameTask();
+    void onVideoFrameReady(std::optional<FrameWithBlockId> frameWithBlockId);
 
     // Extracts buffer ID from graphic block.
     // |block| is the graphic block allocated by |blockPool|.
@@ -67,20 +68,21 @@
     // |bufferCount| is the number of requested buffers.
     static c2_status_t requestNewBufferSet(C2BlockPool& blockPool, int32_t bufferCount);
 
+    // Ask |blockPool| to notify when a block is available via |cb|.
+    // Return true if |blockPool| supports notifying buffer available.
+    static bool setNotifyBlockAvailableCb(C2BlockPool& blockPool, ::base::OnceClosure cb);
+
     std::shared_ptr<C2BlockPool> mBlockPool;
     const media::Size mSize;
     const HalPixelFormat mPixelFormat;
     const C2MemoryUsage mMemoryUsage;
 
-    size_t mNumPendingRequests = 0;
+    GetVideoFrameCB mOutputCb;
 
     scoped_refptr<::base::SequencedTaskRunner> mClientTaskRunner;
     ::base::Thread mFetchThread{"VideoFramePoolFetchThread"};
     scoped_refptr<::base::SequencedTaskRunner> mFetchTaskRunner;
 
-    // Set to true to unconditionally interrupt pending frame requests.
-    std::atomic<bool> mCancelGetFrame = false;
-
     ::base::WeakPtr<VideoFramePool> mClientWeakThis;
     ::base::WeakPtr<VideoFramePool> mFetchWeakThis;
     ::base::WeakPtrFactory<VideoFramePool> mClientWeakThisFactory{this};
diff --git a/plugin_store/Android.bp b/plugin_store/Android.bp
index ed9d784..73dccaf 100644
--- a/plugin_store/Android.bp
+++ b/plugin_store/Android.bp
@@ -21,6 +21,7 @@
     ],
     shared_libs: [
         "android.hardware.graphics.bufferqueue@2.0",
+        "libchrome",
         "libcutils",
         "libhardware",
         "libhidlbase",
@@ -35,6 +36,7 @@
     cflags: [
       "-Werror",
       "-Wall",
+      "-Wno-unused-parameter",  // needed for libchrome/base codes
       "-Wthread-safety",
     ],
 }
diff --git a/plugin_store/C2VdaBqBlockPool.cpp b/plugin_store/C2VdaBqBlockPool.cpp
index f0b95dd..9abc698 100644
--- a/plugin_store/C2VdaBqBlockPool.cpp
+++ b/plugin_store/C2VdaBqBlockPool.cpp
@@ -17,6 +17,7 @@
 #include <C2BlockInternal.h>
 #include <android/hardware/graphics/bufferqueue/2.0/IGraphicBufferProducer.h>
 #include <android/hardware/graphics/bufferqueue/2.0/IProducerListener.h>
+#include <base/callback.h>
 #include <log/log.h>
 #include <system/window.h>
 #include <types.h>
@@ -346,9 +347,6 @@
     explicit EventNotifier(const std::shared_ptr<Listener>& listener) : mListener(listener) {}
     virtual ~EventNotifier() = default;
 
-    // Enable or disable the notifier. The notifier would notify |mListener| when enabled.
-    virtual void enable(bool enabled) = 0;
-
 protected:
     void notify() {
         ALOGV("%s()", __func__);
@@ -367,89 +365,14 @@
     using EventNotifier::EventNotifier;
     ~BufferReleasedNotifier() override = default;
 
-    // EventNotifier implementation
-    void enable(bool enabled) override {
-        ALOGV("%s(%d)", __func__, enabled);
-        mEnabled = enabled;
-    }
-
     // HProducerListener implementation
     Return<void> onBuffersReleased(uint32_t count) override {
         ALOGV("%s(%u)", __func__, count);
-        if (count > 0 && mEnabled.load()) {
+        if (count > 0) {
             notify();
         }
         return {};
     }
-
-private:
-    std::atomic<bool> mEnabled{false};
-};
-
-// Notifies the listener with exponential backoff delay.
-class ExpDelayedNotifier : public EventNotifier {
-public:
-    explicit ExpDelayedNotifier(const std::shared_ptr<Listener>& listener)
-          : EventNotifier(listener) {
-        mRunningThread = std::thread(&ExpDelayedNotifier::run, this);
-    }
-    ~ExpDelayedNotifier() override {
-        ALOGV("%s()", __func__);
-        {
-            std::unique_lock<std::mutex> lock(mMutex);
-            mDestroying = true;
-            mInterruptCv.notify_one();
-        }
-        mRunningThread.join();
-    }
-
-    void enable(bool enabled) override {
-        ALOGV("%s(%d)", __func__, enabled);
-        std::lock_guard<std::mutex> lock(mMutex);
-
-        if (mEnabled == enabled) {
-            ALOGW("%s(): ExpDelayedNotifier already triggered %s.", __func__,
-                  enabled ? "on" : "off");
-            return;
-        }
-        mEnabled = enabled;
-        mInterruptCv.notify_one();
-    }
-
-private:
-    void run() {
-        ALOGV("%s()", __func__);
-        constexpr size_t kFetchRetryDelayInit = 1000;  // Initial delay: 1ms
-        constexpr size_t kFetchRetryDelayMax = 16000;  // Max delay: 16ms (1 frame at 60fps)
-
-        std::unique_lock<std::mutex> lock(mMutex);
-        while (true) {
-            mInterruptCv.wait(lock, [this]() { return mEnabled || mDestroying; });
-            if (mDestroying) return;
-
-            size_t delay = kFetchRetryDelayInit;
-            while (mEnabled && !mDestroying) {
-                mInterruptCv.wait_for(lock, delay * 1us,
-                                      [this]() { return !mEnabled || mDestroying; });
-                if (mDestroying) return;
-                if (!mEnabled) break;
-
-                notify();
-                delay = std::min(delay * 2, kFetchRetryDelayMax);
-            }
-        }
-    }
-
-    // The background thread for exponential backoff delay.
-    std::thread mRunningThread;
-    // The mutex to protect other members and condition variables.
-    std::mutex mMutex;
-    // Used to get interrupt when the notifier is enabled or destroyed.
-    std::condition_variable mInterruptCv;
-    // Set to true when the notifier is enabled.
-    bool mEnabled = false;
-    // Set to true when the notifier is about to be destroyed.
-    bool mDestroying = false;
 };
 
 /**
@@ -533,6 +456,7 @@
     c2_status_t updateGraphicBlock(bool willCancel, uint32_t oldSlot, uint32_t* newSlot,
                                    std::shared_ptr<C2GraphicBlock>* block /* nonnull */);
     c2_status_t getMinBuffersForDisplay(size_t* bufferCount);
+    bool setNotifyBlockAvailableCb(::base::OnceClosure cb);
 
 private:
     friend struct C2VdaBqBlockPoolData;
@@ -550,10 +474,6 @@
         C2AndroidMemoryUsage mUsage = C2MemoryUsage(0);
     };
 
-    c2_status_t fetchGraphicBlockInternalLocked(
-            uint32_t width, uint32_t height, uint32_t format, C2MemoryUsage usage,
-            std::shared_ptr<C2GraphicBlock>* block /* nonnull */);
-
     // For C2VdaBqBlockPoolData to detach corresponding slot buffer from BufferQueue.
     void detachBuffer(uint64_t producerId, int32_t slotId);
 
@@ -605,15 +525,13 @@
 
     // Listener for buffer release events.
     sp<EventNotifier> mFetchBufferNotifier;
+
     std::mutex mBufferReleaseMutex;
-    // Counter for the number of invocations of onProducerBufferReleased
-    uint64_t mBufferReleaseCount{0};
-    // Counter for the number of invocations of configureProducer
-    uint64_t mConfigCount{0};
-    // Cvar which is waited upon after failed attempts to deque buffers from
-    // mProducer. It is signaled when the consumer releases buffers (through
-    // onProducerBufferReleased) or when the producer changes.
-    std::condition_variable mBufferReleaseCv;
+    // Set to true when the buffer release event is triggered after dequeueing
+    // buffer from IGBP times out.
+    bool mBufferReleasedAfterTimedOut GUARDED_BY(mBufferReleaseMutex) = false;
+    // The callback to notify the caller the buffer is available.
+    ::base::OnceClosure mNotifyBlockAvailableCb GUARDED_BY(mBufferReleaseMutex);
 };
 
 C2VdaBqBlockPool::Impl::Impl(const std::shared_ptr<C2Allocator>& allocator)
@@ -627,48 +545,6 @@
     ALOGV("%s()", __func__);
     std::lock_guard<std::mutex> lock(mMutex);
 
-    c2_status_t res = fetchGraphicBlockInternalLocked(width, height, format, usage, block);
-    if (res != C2_TIMED_OUT) {
-        ALOGV("%s() fetchGraphicBlockInternalLocked() return %d", __func__, res);
-        return res;
-    }
-
-    mFetchBufferNotifier->enable(true);
-    while (true) {
-        {
-            std::unique_lock<std::mutex> releaseLock(mBufferReleaseMutex);
-            // Wait for either mBufferReleaseCount or mConfigCount to change.
-            uint64_t curConfigCount = mConfigCount;
-            uint64_t currentBufferReleaseCount = mBufferReleaseCount;
-            bool success = mBufferReleaseCv.wait_for(
-                    releaseLock, 100ms, [currentBufferReleaseCount, curConfigCount, this]() {
-                        return currentBufferReleaseCount != mBufferReleaseCount ||
-                               curConfigCount != mConfigCount;
-                    });
-            if (!success) {
-                res = C2_TIMED_OUT;
-                break;
-            } else if (mConfigCount != curConfigCount) {
-                res = C2_BAD_STATE;
-                break;
-            }
-        }
-
-        res = fetchGraphicBlockInternalLocked(width, height, format, usage, block);
-        if (res != C2_TIMED_OUT) {
-            ALOGV("%s() fetchGraphicBlockInternalLocked() return %d", __func__, res);
-            break;
-        }
-    }
-    mFetchBufferNotifier->enable(false);
-    return res;
-}
-
-c2_status_t C2VdaBqBlockPool::Impl::fetchGraphicBlockInternalLocked(
-        uint32_t width, uint32_t height, uint32_t format, C2MemoryUsage usage,
-        std::shared_ptr<C2GraphicBlock>* block /* nonnull */) {
-    ALOGV("%s()", __func__);
-
     if (!mProducer) {
         // Producer will not be configured in byte-buffer mode. Allocate buffers from allocator
         // directly as a basic graphic block pool.
@@ -704,6 +580,10 @@
     if (status == android::INVALID_OPERATION) {
         status = android::TIMED_OUT;
     }
+    if (status == android::TIMED_OUT) {
+        std::lock_guard<std::mutex> lock(mBufferReleaseMutex);
+        mBufferReleasedAfterTimedOut = false;
+    }
     if (status != android::NO_ERROR && status != BUFFER_NEEDS_REALLOCATION) {
         return asC2Error(status);
     }
@@ -717,7 +597,7 @@
             }
 
             if (fenceStatus == -ETIME) {  // fence wait timed out
-                ALOGV("buffer fence wait timed out, wait for retry...");
+                ALOGV("%s(): buffer (slot=%d) fence wait timed out", __func__, slot);
                 return C2_TIMED_OUT;
             }
             ALOGE("buffer fence wait error: %d", fenceStatus);
@@ -805,9 +685,20 @@
 
 void C2VdaBqBlockPool::Impl::onEventNotified() {
     ALOGV("%s()", __func__);
-    std::lock_guard<std::mutex> lock(mBufferReleaseMutex);
-    mBufferReleaseCount++;
-    mBufferReleaseCv.notify_one();
+    ::base::OnceClosure outputCb;
+    {
+        std::lock_guard<std::mutex> lock(mBufferReleaseMutex);
+
+        mBufferReleasedAfterTimedOut = true;
+        if (mNotifyBlockAvailableCb) {
+            outputCb = std::move(mNotifyBlockAvailableCb);
+        }
+    }
+
+    // Calling the callback outside the lock to avoid the deadlock.
+    if (outputCb) {
+        std::move(outputCb).Run();
+    }
 }
 
 c2_status_t C2VdaBqBlockPool::Impl::queryGenerationAndUsage(
@@ -832,7 +723,7 @@
                 return C2_CORRUPTED;
             }
             if (fenceStatus == -ETIME) {  // fence wait timed out
-                ALOGV("buffer fence wait timed out, wait for retry...");
+                ALOGV("%s(): buffer (slot=%d) fence wait timed out", __func__, slot);
                 return C2_TIMED_OUT;
             }
             ALOGE("buffer fence wait error: %d", fenceStatus);
@@ -966,12 +857,6 @@
         mSlotAllocations.clear();
     }
 
-    {
-        std::lock_guard<std::mutex> releaseLock(mBufferReleaseMutex);
-        mConfigCount++;
-        mBufferReleaseCv.notify_one();
-    }
-
     if (newProducer->setDequeueTimeout(0) != android::NO_ERROR) {
         ALOGE("%s(): failed to setDequeueTimeout(0)", __func__);
         return;
@@ -982,9 +867,6 @@
     if (newProducer->connect(listener, 'ARC\0', false) == android::NO_ERROR) {
         ALOGI("connected to ARC-specific IGBP listener.");
         mFetchBufferNotifier = listener;
-    } else {
-        ALOGI("Fallback to exponential backoff polling.");
-        mFetchBufferNotifier = new ExpDelayedNotifier(shared_from_this());
     }
 
     // HGraphicBufferProducer could (and should) be replaced if the client has set a new generation
@@ -1197,6 +1079,32 @@
     }
 }
 
+bool C2VdaBqBlockPool::Impl::setNotifyBlockAvailableCb(::base::OnceClosure cb) {
+    ALOGV("%s()", __func__);
+    if (mFetchBufferNotifier == nullptr) {
+        return false;
+    }
+
+    ::base::OnceClosure outputCb;
+    {
+        std::lock_guard<std::mutex> lock(mBufferReleaseMutex);
+
+        // If there is any buffer released after dequeueBuffer() timed out, then we could notify the
+        // caller directly.
+        if (mBufferReleasedAfterTimedOut) {
+            outputCb = std::move(cb);
+        } else {
+            mNotifyBlockAvailableCb = std::move(cb);
+        }
+    }
+
+    // Calling the callback outside the lock to avoid the deadlock.
+    if (outputCb) {
+        std::move(outputCb).Run();
+    }
+    return true;
+}
+
 C2VdaBqBlockPool::C2VdaBqBlockPool(const std::shared_ptr<C2Allocator>& allocator,
                                    const local_id_t localId)
       : C2BufferQueueBlockPool(allocator, localId), mLocalId(localId), mImpl(new Impl(allocator)) {}
@@ -1246,6 +1154,13 @@
     return C2_NO_INIT;
 }
 
+bool C2VdaBqBlockPool::setNotifyBlockAvailableCb(::base::OnceClosure cb) {
+    if (mImpl) {
+        return mImpl->setNotifyBlockAvailableCb(std::move(cb));
+    }
+    return false;
+}
+
 C2VdaBqBlockPoolData::C2VdaBqBlockPoolData(uint64_t producerId, int32_t slotId,
                                            const std::shared_ptr<C2VdaBqBlockPool::Impl>& pool)
       : mProducerId(producerId), mSlotId(slotId), mPool(pool) {}
diff --git a/plugin_store/include/v4l2_codec2/plugin_store/C2VdaBqBlockPool.h b/plugin_store/include/v4l2_codec2/plugin_store/C2VdaBqBlockPool.h
index b600716..fd524d2 100644
--- a/plugin_store/include/v4l2_codec2/plugin_store/C2VdaBqBlockPool.h
+++ b/plugin_store/include/v4l2_codec2/plugin_store/C2VdaBqBlockPool.h
@@ -12,6 +12,7 @@
 #include <C2BqBufferPriv.h>
 #include <C2Buffer.h>
 #include <C2PlatformSupport.h>
+#include <base/callback_forward.h>
 
 namespace android {
 
@@ -131,6 +132,18 @@
      */
     c2_status_t getMinBuffersForDisplay(size_t* bufferCount);
 
+    /**
+     * Set the callback that will be triggered when there is block available.
+     *
+     * \note C2VdaBqBlockPool-specific function
+     *
+     * \param cb  the callback function that will be triggered when there is block available.
+     *
+     * Return false if we don't support to notify the caller when a buffer is available.
+     *
+     */
+    bool setNotifyBlockAvailableCb(base::OnceClosure cb);
+
 private:
     friend struct C2VdaBqBlockPoolData;
     class Impl;
diff --git a/tests/c2_e2e_test/AndroidManifest.xml b/tests/c2_e2e_test/AndroidManifest.xml
index 3898348..e167cb5 100644
--- a/tests/c2_e2e_test/AndroidManifest.xml
+++ b/tests/c2_e2e_test/AndroidManifest.xml
@@ -19,7 +19,8 @@
         android:allowBackup="false"
         android:label="@string/app_name">
         <activity android:name=".E2eTestActivity"
-                  android:launchMode="singleTop">
+                  android:launchMode="singleTop"
+                  android:theme="@android:style/Theme.NoTitleBar.Fullscreen">
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
diff --git a/tests/c2_e2e_test/jni/mediacodec_decoder.cpp b/tests/c2_e2e_test/jni/mediacodec_decoder.cpp
index c263eec..1b835c0 100644
--- a/tests/c2_e2e_test/jni/mediacodec_decoder.cpp
+++ b/tests/c2_e2e_test/jni/mediacodec_decoder.cpp
@@ -327,7 +327,7 @@
     // output buffers from |codec_|.
     uint64_t timestamp_us = 0;
 
-    ALOGD("queueInputBuffer(index=%zu, offset=0, size=%zu, time=%" PRIu64 ", flags=%u) #%d", index,
+    ALOGV("queueInputBuffer(index=%zu, offset=0, size=%zu, time=%" PRIu64 ", flags=%u) #%d", index,
           fragment->data.size(), timestamp_us, input_flag, input_fragment_index_);
     media_status_t status = AMediaCodec_queueInputBuffer(
             codec_, index, 0 /* offset */, fragment->data.size(), timestamp_us, input_flag);
@@ -344,7 +344,7 @@
     // robustness.
     uint64_t timestamp_us = 0;
 
-    ALOGD("queueInputBuffer(index=%zu) EOS", index);
+    ALOGV("queueInputBuffer(index=%zu) EOS", index);
     media_status_t status =
             AMediaCodec_queueInputBuffer(codec_, index, 0 /* offset */, 0 /* size */, timestamp_us,
                                          AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM);
@@ -368,7 +368,7 @@
     }
 
     received_outputs_++;
-    ALOGD("ReceiveOutputBuffer(index=%zu, size=%d, flags=%u) #%d", index, info.size, info.flags,
+    ALOGV("ReceiveOutputBuffer(index=%zu, size=%d, flags=%u) #%d", index, info.size, info.flags,
           received_outputs_);
 
     // Do not callback for dummy EOS output (info.size == 0)