Reland "codec2: add dequeue thread loop to recycle output buffers"
This reverts commit cb35176c4c402a7d77cd1c472002976be4021cb8.
The dequeue thread is implemented for C2VDAComponent to process a routine to
dequeue buffer from C2VDABQBlockPool. When a buffer (along with its slot index)
is dequeued and obtained from block pool, C2VDAComponent has the map of slot
index to buffer id, and the corresponding buffer is then reusable for VDA.
Note: this change will break C2VDAComponent_test, need to fix it
Note: this only support output surface mode, we need another solution for
byte-buffer mode
Bug: 79239042
Test: CtsMediaTestCases android.media.cts.MediaPlayerTest#testLocalVideo_MP4_H264_480x360_1000kbps_25fps_AAC_Stereo_128kbps_44110Hz
Change-Id: Icb42c425f90e0615c63728f75627a7a0cb661df3
diff --git a/Android.mk b/Android.mk
index bff4409..cfb8442 100644
--- a/Android.mk
+++ b/Android.mk
@@ -29,6 +29,7 @@
liblog \
libmedia \
libstagefright \
+ libstagefright_bufferqueue_helper \
libstagefright_codec2 \
libstagefright_codec2_vndk \
libstagefright_simple_c2component \
diff --git a/C2VDAComponent.cpp b/C2VDAComponent.cpp
index 2b3abde..2e8f9d3 100644
--- a/C2VDAComponent.cpp
+++ b/C2VDAComponent.cpp
@@ -12,11 +12,14 @@
#endif
#define __C2_GENERATE_GLOBAL_VARS__
+#include <C2VDAAllocatorStore.h>
+#include <C2VdaBqBlockPool.h>
#include <C2VDAComponent.h>
#include <C2VDASupport.h> // to getParamReflector from vda store
#include <videodev2.h>
+#include <C2AllocatorGralloc.h>
#include <C2ComponentFactory.h>
#include <C2PlatformSupport.h>
@@ -52,6 +55,9 @@
const C2String kVP8DecoderName = "c2.vda.vp8.decoder";
const C2String kVP9DecoderName = "c2.vda.vp9.decoder";
+const uint32_t kDpbOutputBufferExtraCount = 3; // Use the same number as ACodec.
+const int kDequeueRetryDelayUs = 10000; // Wait time of dequeue buffer retry in microseconds.
+
} // namespace
C2VDAComponent::IntfImpl::IntfImpl(C2String name, const std::shared_ptr<C2ReflectorHelper>& helper)
@@ -170,28 +176,6 @@
CHECK_NE(mComponentState, ComponentState::UNINITIALIZED); \
} while (0)
-class C2VDAGraphicBuffer : public C2Buffer {
-public:
- C2VDAGraphicBuffer(const std::shared_ptr<C2GraphicBlock>& block, const media::Rect& visibleRect,
- const base::Closure& releaseCB);
- ~C2VDAGraphicBuffer() override;
-
-private:
- base::Closure mReleaseCB;
-};
-
-C2VDAGraphicBuffer::C2VDAGraphicBuffer(const std::shared_ptr<C2GraphicBlock>& block,
- const media::Rect& visibleRect,
- const base::Closure& releaseCB)
- : C2Buffer({block->share(C2Rect(visibleRect.width(), visibleRect.height()), C2Fence())}),
- mReleaseCB(releaseCB) {}
-
-C2VDAGraphicBuffer::~C2VDAGraphicBuffer() {
- if (!mReleaseCB.is_null()) {
- mReleaseCB.Run();
- }
-}
-
C2VDAComponent::VideoFormat::VideoFormat(HalPixelFormat pixelFormat, uint32_t minNumBuffers,
media::Size codedSize, media::Rect visibleRect)
: mPixelFormat(pixelFormat),
@@ -199,11 +183,22 @@
mCodedSize(codedSize),
mVisibleRect(visibleRect) {}
+static uint32_t getSlotFromGraphicBlockHandle(const C2Handle* const handle) {
+ uint32_t width, height, format, stride, igbp_slot, generation;
+ uint64_t usage, igbp_id;
+ _UnwrapNativeCodec2GrallocMetadata(
+ handle, &width, &height, &format, &usage, &stride, &generation, &igbp_id, &igbp_slot);
+ ALOGV("Unwrap Metadata: igbp[%" PRIu64 ", %u] (%u*%u, fmt %#x, usage %" PRIx64 ", stride %u)",
+ igbp_id, igbp_slot, width, height, format, usage, stride);
+ return igbp_slot;
+}
+
C2VDAComponent::C2VDAComponent(C2String name, c2_node_id_t id,
const std::shared_ptr<C2ReflectorHelper>& helper)
: mIntfImpl(std::make_shared<IntfImpl>(name, helper)),
mIntf(std::make_shared<SimpleInterface<IntfImpl>>(name.c_str(), id, mIntfImpl)),
mThread("C2VDAComponentThread"),
+ mDequeueThread("C2VDAComponentDequeueThread"),
mVDAInitResult(VideoDecodeAcceleratorAdaptor::Result::ILLEGAL_STATE),
mComponentState(ComponentState::UNINITIALIZED),
mDrainWithEOS(false),
@@ -241,6 +236,7 @@
mVDAAdaptor->destroy();
mVDAAdaptor.reset(nullptr);
}
+ stopDequeueThread();
}
void C2VDAComponent::onStart(media::VideoCodecProfile profile, base::WaitableEvent* done) {
@@ -351,16 +347,9 @@
reportFinishedWorkIfAny();
}
-// This is used as callback while output buffer is released by client.
-// TODO(johnylin): consider to use C2Buffer::registerOnDestroyNotify instead
-void C2VDAComponent::returnOutputBuffer(int32_t pictureBufferId) {
- mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onOutputBufferReturned,
- base::Unretained(this), pictureBufferId));
-}
-
-void C2VDAComponent::onOutputBufferReturned(int32_t pictureBufferId) {
+void C2VDAComponent::onOutputBufferReturned(uint32_t slotId) {
DCHECK(mTaskRunner->BelongsToCurrentThread());
- ALOGV("onOutputBufferReturned: picture id=%d", pictureBufferId);
+ ALOGV("onOutputBufferReturned: slot id=%u", slotId);
if (mComponentState == ComponentState::UNINITIALIZED) {
// Output buffer is returned from client after component is stopped. Just let the buffer be
// released.
@@ -369,7 +358,7 @@
// TODO(johnylin): when buffer is returned, we should confirm that output format is not changed
// yet. If changed, just let the buffer be released.
- GraphicBlockInfo* info = getGraphicBlockById(pictureBufferId);
+ GraphicBlockInfo* info = getGraphicBlockBySlot(slotId);
if (!info) {
reportError(C2_CORRUPTED);
return;
@@ -402,12 +391,14 @@
CHECK_EQ(info->mState, GraphicBlockInfo::State::OWNED_BY_ACCELERATOR);
// Output buffer will be passed to client soon along with mListener->onWorkDone_nb().
info->mState = GraphicBlockInfo::State::OWNED_BY_CLIENT;
+ mBuffersInClient++;
// Attach output buffer to the work corresponded to bitstreamId.
- work->worklets.front()->output.buffers.emplace_back(std::make_shared<C2VDAGraphicBuffer>(
- info->mGraphicBlock, mOutputFormat.mVisibleRect,
- base::Bind(&C2VDAComponent::returnOutputBuffer, mWeakThisFactory.GetWeakPtr(),
- pictureBufferId)));
+ auto block = info->mGraphicBlock;
+ work->worklets.front()->output.buffers.emplace_back(C2Buffer::CreateGraphicBuffer(
+ block->share(C2Rect(mOutputFormat.mVisibleRect.width(),
+ mOutputFormat.mVisibleRect.height()),
+ C2Fence())));
// TODO: this does not work for timestamps as they can wrap around
int64_t currentTimestamp = base::checked_cast<int64_t>(work->input.ordinal.timestamp.peek());
@@ -552,6 +543,8 @@
mGraphicBlocks.clear();
+ stopDequeueThread();
+
mStopDoneEvent->Signal();
mStopDoneEvent = nullptr;
mComponentState = ComponentState::UNINITIALIZED;
@@ -606,6 +599,19 @@
return &mGraphicBlocks[blockId];
}
+C2VDAComponent::GraphicBlockInfo* C2VDAComponent::getGraphicBlockBySlot(uint32_t slotId) {
+ auto blockIter = std::find_if(mGraphicBlocks.begin(), mGraphicBlocks.end(),
+ [slotId](const GraphicBlockInfo& gb) {
+ return gb.mSlotId == slotId;
+ });
+
+ if (blockIter == mGraphicBlocks.end()) {
+ ALOGE("getGraphicBlockBySlot failed: slot=%u", slotId);
+ return nullptr;
+ }
+ return &(*blockIter);
+}
+
void C2VDAComponent::onOutputFormatChanged(std::unique_ptr<VideoFormat> format) {
DCHECK(mTaskRunner->BelongsToCurrentThread());
ALOGV("onOutputFormatChanged");
@@ -684,7 +690,25 @@
}
}
+ stopDequeueThread();
mGraphicBlocks.clear();
+
+ // Set requested buffer count to C2VdaBqBlockPool.
+ std::shared_ptr<C2VdaBqBlockPool> bqPool =
+ std::static_pointer_cast<C2VdaBqBlockPool>(mOutputBlockPool);
+ if (bqPool) {
+ err = bqPool->requestNewBufferSet(static_cast<uint32_t>(bufferCount));
+ if (err != C2_OK) {
+ ALOGE("failed to set buffer count magic to block pool: %d", err);
+ reportError(err);
+ return err;
+ }
+ } else {
+ ALOGE("Component only supports C2VdaBqBlockPool");
+ reportError(C2_CORRUPTED);
+ return C2_CORRUPTED;
+ }
+
for (size_t i = 0; i < bufferCount; ++i) {
std::shared_ptr<C2GraphicBlock> block;
C2MemoryUsage usage = {C2MemoryUsage::CPU_READ, 0};
@@ -699,6 +723,11 @@
appendOutputBuffer(std::move(block));
}
mOutputFormat.mMinNumBuffers = bufferCount;
+
+ if (!startDequeueThread(size, pixelFormat)) {
+ reportError(C2_CORRUPTED);
+ return C2_CORRUPTED;
+ }
return C2_OK;
}
@@ -766,6 +795,8 @@
info.mHandle = std::move(passedHandle);
info.mPlanes = std::move(passedPlanes);
+ info.mSlotId = getSlotFromGraphicBlockHandle(info.mGraphicBlock->handle());
+
mGraphicBlocks.push_back(std::move(info));
}
@@ -1070,6 +1101,56 @@
mListener->onError_nb(shared_from_this(), static_cast<uint32_t>(error));
}
+bool C2VDAComponent::startDequeueThread(const media::Size& size, uint32_t pixelFormat) {
+ CHECK(!mDequeueThread.IsRunning());
+ if (!mDequeueThread.Start()) {
+ ALOGE("failed to start dequeue thread!!");
+ return false;
+ }
+ mDequeueLoopStop.store(false);
+ mBuffersInClient.store(0u);
+ mDequeueThread.task_runner()->PostTask(
+ FROM_HERE, base::Bind(&C2VDAComponent::dequeueThreadLoop, base::Unretained(this),
+ size, pixelFormat));
+ return true;
+}
+
+void C2VDAComponent::stopDequeueThread() {
+ if (mDequeueThread.IsRunning()) {
+ mDequeueLoopStop.store(true);
+ mDequeueThread.Stop();
+ }
+}
+
+void C2VDAComponent::dequeueThreadLoop(const media::Size& size, uint32_t pixelFormat) {
+ ALOGV("dequeueThreadLoop starts");
+ DCHECK(mDequeueThread.task_runner()->BelongsToCurrentThread());
+
+ while (!mDequeueLoopStop.load()) {
+ if (mBuffersInClient.load() == 0) {
+ ::usleep(kDequeueRetryDelayUs); // wait for retry
+ continue;
+ }
+ std::shared_ptr<C2GraphicBlock> block;
+ C2MemoryUsage usage = {C2MemoryUsage::CPU_READ, 0};
+ auto err = mOutputBlockPool->fetchGraphicBlock(size.width(), size.height(), pixelFormat,
+ usage, &block);
+ if (err == C2_TIMED_OUT) {
+ continue; // wait for retry
+ }
+ if (err == C2_OK) {
+ auto slot = getSlotFromGraphicBlockHandle(block->handle());
+ mTaskRunner->PostTask(FROM_HERE, base::Bind(&C2VDAComponent::onOutputBufferReturned,
+ base::Unretained(this), slot));
+ mBuffersInClient--;
+ } else {
+ ALOGE("dequeueThreadLoop got error: %d", err);
+ break;
+ }
+ }
+ ALOGV("dequeueThreadLoop terminates");
+}
+
class C2VDAComponentFactory : public C2ComponentFactory {
public:
C2VDAComponentFactory(C2String decoderName)
diff --git a/include/C2VDAComponent.h b/include/C2VDAComponent.h
index 6df3fe7..4eb7453 100644
--- a/include/C2VDAComponent.h
+++ b/include/C2VDAComponent.h
@@ -132,10 +132,6 @@
ERROR,
};
- enum {
- kDpbOutputBufferExtraCount = 3, // Use the same number as ACodec.
- };
-
// This constant is used to tell apart from drain_mode_t enumerations in C2Component.h, which
// means no drain request.
// Note: this value must be different than all enumerations in drain_mode_t.
@@ -156,6 +152,7 @@
};
int32_t mBlockId = -1;
+ uint32_t mSlotId = 0;
State mState = State::OWNED_BY_COMPONENT;
// Graphic block buffer allocated from allocator. This should be reused.
std::shared_ptr<C2GraphicBlock> mGraphicBlock;
@@ -178,9 +175,6 @@
media::Rect visibleRect);
};
- // Used as the release callback for C2VDAGraphicBuffer to get back the output buffer.
- void returnOutputBuffer(int32_t pictureBufferId);
-
// These tasks should be run on the component thread |mThread|.
void onDestroy();
void onStart(media::VideoCodecProfile profile, base::WaitableEvent* done);
@@ -197,7 +191,7 @@
void onStopDone();
void onOutputFormatChanged(std::unique_ptr<VideoFormat> format);
void onVisibleRectChanged(const media::Rect& cropRect);
- void onOutputBufferReturned(int32_t pictureBufferId);
+ void onOutputBufferReturned(uint32_t slotId);
// Send input buffer to accelerator with specified bitstream id.
void sendInputBufferToAccelerator(const C2ConstLinearBlock& input, int32_t bitstreamId);
@@ -207,6 +201,8 @@
void setOutputFormatCrop(const media::Rect& cropRect);
// Helper function to get the specified GraphicBlockInfo object by its id.
GraphicBlockInfo* getGraphicBlockById(int32_t blockId);
+ // Helper function to get the specified GraphicBlockInfo object by its slot index.
+ GraphicBlockInfo* getGraphicBlockBySlot(uint32_t slotId);
// Helper function to get the specified work in mPendingWorks by bitstream id.
C2Work* getPendingWorkByBitstreamId(int32_t bitstreamId);
// Try to apply the output format change.
@@ -227,6 +223,13 @@
// Helper function to determine if the work is finished.
bool isWorkDone(const C2Work* work) const;
+ // Start dequeue thread, return true on success.
+ bool startDequeueThread(const media::Size& size, uint32_t pixelFormat);
+ // Stop dequeue thread.
+ void stopDequeueThread();
+ // The rountine task running on dequeue thread.
+ void dequeueThreadLoop(const media::Size& size, uint32_t pixelFormat);
+
// The pointer of component interface implementation.
std::shared_ptr<IntfImpl> mIntfImpl;
// The pointer of component interface.
@@ -239,6 +242,13 @@
// The task runner on component thread.
scoped_refptr<base::SingleThreadTaskRunner> mTaskRunner;
+ // The dequeue buffer loop thread.
+ base::Thread mDequeueThread;
+ // The stop signal for dequeue loop which should be atomic (toggled by main thread).
+ std::atomic<bool> mDequeueLoopStop;
+ // The count of buffers owned by client which should be atomic.
+ std::atomic<uint32_t> mBuffersInClient;
+
// The following members should be utilized on component thread |mThread|.
// The initialization result retrieved from VDA.