Accelerate YUV422 to RGBA conversion

Using libyuv's methods to improve YUV422 to RGBA format conversion.

Bug: 190783702
Test: adb shell evs_app --test --gear reverse
Change-Id: Ibd8ce7fae383bc234e6d2747412b64dc344a40b0
diff --git a/cpp/evs/sampleDriver/EvsV4lCamera.cpp b/cpp/evs/sampleDriver/EvsV4lCamera.cpp
index 47861a7..8fb4882 100644
--- a/cpp/evs/sampleDriver/EvsV4lCamera.cpp
+++ b/cpp/evs/sampleDriver/EvsV4lCamera.cpp
@@ -28,13 +28,10 @@
 #include <ui/GraphicBufferMapper.h>
 #include <utils/SystemClock.h>
 
+namespace {
 
-namespace android {
-namespace hardware {
-namespace automotive {
-namespace evs {
-namespace V1_1 {
-namespace implementation {
+// The size of a pixel of RGBA format data in bytes
+constexpr auto kBytesPerPixelRGBA = 4;
 
 // Default camera output image resolution
 const std::array<int32_t, 2> kDefaultResolution = {640, 480};
@@ -43,6 +40,16 @@
 // Safeguards against unreasonable resource consumption and provides a testable limit
 static const unsigned MAX_BUFFERS_IN_FLIGHT = 100;
 
+}; // anonymous namespace
+
+
+namespace android {
+namespace hardware {
+namespace automotive {
+namespace evs {
+namespace V1_1 {
+namespace implementation {
+
 EvsV4lCamera::EvsV4lCamera(const char *deviceName,
                            unique_ptr<ConfigManager::CameraInfo> &camInfo) :
         mFramesAllowed(0),
@@ -775,6 +782,11 @@
         bufDesc_1_1.timestamp =
             pV4lBuff->timestamp.tv_sec * 1e+6 + pV4lBuff->timestamp.tv_usec;
 
+        const auto sizeInRGBA = pDesc->width * pDesc->height * kBytesPerPixelRGBA;
+        if (mColorSpaceConversionBuffer.size() < sizeInRGBA) {
+            mColorSpaceConversionBuffer.resize(sizeInRGBA);
+        }
+
         // Lock our output buffer for writing
         // TODO(b/145459970): Sometimes, physical camera device maps a buffer
         // into the address that is about to be unmapped by another device; this
@@ -799,7 +811,8 @@
 
         // Transfer the video image into the output buffer, making any needed
         // format conversion along the way
-        mFillBufferFromVideo(bufDesc_1_1, (uint8_t *)targetPixels, pData, mVideo.getStride());
+        mFillBufferFromVideo(bufDesc_1_1, (uint8_t *)targetPixels, pData,
+                             mColorSpaceConversionBuffer.data(), mStride);
 
         // Unlock the output buffer
         mapper.unlock(bufDesc_1_1.buffer.nativeHandle);
diff --git a/cpp/evs/sampleDriver/EvsV4lCamera.h b/cpp/evs/sampleDriver/EvsV4lCamera.h
index 4aa8bc7..28bd7b8 100644
--- a/cpp/evs/sampleDriver/EvsV4lCamera.h
+++ b/cpp/evs/sampleDriver/EvsV4lCamera.h
@@ -143,7 +143,7 @@
 
     // Which format specific function we need to use to move camera imagery into our output buffers
     void(*mFillBufferFromVideo)(const BufferDesc& tgtBuff, uint8_t* tgt,
-                                void* imgData, unsigned imgStride);
+                                void* imgData, void* buf, unsigned imgStride);
 
 
     EvsResult doneWithFrame_impl(const uint32_t id, const buffer_handle_t handle);
@@ -166,6 +166,9 @@
 
     // Frame counter
     uint64_t mFrameCounter = 0;
+
+    // A buffer to hold an intermediate color conversion data
+    std::vector<uint8_t> mColorSpaceConversionBuffer;
 };
 
 } // namespace implementation
diff --git a/cpp/evs/sampleDriver/bufferCopy.cpp b/cpp/evs/sampleDriver/bufferCopy.cpp
index 098af61..253e39d 100644
--- a/cpp/evs/sampleDriver/bufferCopy.cpp
+++ b/cpp/evs/sampleDriver/bufferCopy.cpp
@@ -19,6 +19,14 @@
 #include <android-base/logging.h>
 #include <libyuv.h>
 
+namespace {
+
+inline constexpr size_t kYuv422BytesPerPixel = 2;
+inline constexpr size_t kRgbaBytesPerPixel = 4;
+
+}; // anonymous namespace
+
+
 namespace android {
 namespace hardware {
 namespace automotive {
@@ -38,7 +46,8 @@
 }
 
 
-void fillNV21FromNV21(const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, unsigned) {
+void fillNV21FromNV21(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void*, unsigned) {
     // The NV21 format provides a Y array of 8bit values, followed by a 1/2 x 1/2 interleave U/V array.
     // It assumes an even width and height for the overall image, and a horizontal stride that is
     // an even multiple of 16 bytes for both the Y and UV arrays.
@@ -57,7 +66,8 @@
 }
 
 
-void fillNV21FromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, unsigned imgStride) {
+void fillNV21FromYUYV(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void*, unsigned imgStride) {
     // The YUYV format provides an interleaved array of pixel values with U and V subsampled in
     // the horizontal direction only.  Also known as interleaved 422 format.  A 4 byte
     // "macro pixel" provides the Y value for two adjacent pixels and the U and V values shared
@@ -120,15 +130,17 @@
 }
 
 
-void fillRGBAFromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, unsigned imgStride) {
+void fillRGBAFromYUYV(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void* buf, unsigned imgStride) {
     const AHardwareBuffer_Desc* pDesc =
         reinterpret_cast<const AHardwareBuffer_Desc*>(&tgtBuff.buffer.description);
     // Converts YUY2ToARGB (little endian).  Please note that libyuv uses the
     // little endian while we're using the big endian in RGB format names.
-    const auto dstStrideInBytes = pDesc->stride * 4;  // 4-byte per pixel
+    const auto srcStrideInBytes = imgStride * kYuv422BytesPerPixel;
+    const auto dstStrideInBytes = pDesc->stride * kRgbaBytesPerPixel;
     auto result = libyuv::YUY2ToARGB((const uint8_t*)imgData,
-                                     imgStride,             // input stride in bytes
-                                     tgt,
+                                     srcStrideInBytes,      // input stride in bytes
+                                     (uint8_t*)buf,
                                      dstStrideInBytes,      // output stride in bytes
                                      pDesc->width,
                                      pDesc->height);
@@ -137,10 +149,8 @@
         return;
     }
 
-    // Swaps R and B pixels to convert BGRA to RGBA in place.
-    // TODO(b/190783702): Consider allocating an extra space to store ARGB data
-    //                    temporarily if below operation is too slow.
-    result = libyuv::ABGRToARGB(tgt, dstStrideInBytes, tgt, dstStrideInBytes,
+    // Swaps R and B pixels to convert BGRA to RGBA
+    result = libyuv::ABGRToARGB((uint8_t*)buf, dstStrideInBytes, tgt, dstStrideInBytes,
                                 pDesc->width, pDesc->height);
     if (result) {
         LOG(ERROR) << "Failed to convert BGRA to RGBA.";
@@ -148,31 +158,60 @@
 }
 
 
-void fillYUYVFromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, unsigned imgStride) {
+void fillRGBAFromUYVY(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void* buf, unsigned imgStride) {
     const AHardwareBuffer_Desc* pDesc =
         reinterpret_cast<const AHardwareBuffer_Desc*>(&tgtBuff.buffer.description);
-    unsigned width = pDesc->width;
-    unsigned height = pDesc->height;
-    uint8_t* src = (uint8_t*)imgData;
-    uint8_t* dst = (uint8_t*)tgt;
-    unsigned srcStrideBytes = imgStride;
-    unsigned dstStrideBytes = pDesc->stride * 2;
+    // Converts UYVYToARGB (little endian).  Please note that libyuv uses the
+    // little endian while we're using the big endian in RGB format names.
+    const auto srcStrideInBytes = imgStride * kYuv422BytesPerPixel;
+    const auto dstStrideInBytes = pDesc->stride * kRgbaBytesPerPixel;
+    auto result = libyuv::UYVYToARGB(static_cast<const uint8_t*>(imgData),
+                                     srcStrideInBytes,      // input stride in bytes
+                                     static_cast<uint8_t*>(buf),
+                                     dstStrideInBytes,      // output stride in bytes
+                                     pDesc->width,
+                                     pDesc->height);
+    if (result) {
+        LOG(ERROR) << "Failed to convert UYVY to BGRA.";
+        return;
+    }
 
-    for (unsigned r=0; r<height; r++) {
-        // Copy a pixel row at a time (2 bytes per pixel, averaged over a YUYV macro pixel)
-        memcpy(dst+r*dstStrideBytes, src+r*srcStrideBytes, width*2);
+    // Swaps R and B pixels to convert BGRA to RGBA
+    result = libyuv::ABGRToARGB(static_cast<uint8_t*>(buf), dstStrideInBytes, tgt,
+                                dstStrideInBytes, pDesc->width, pDesc->height);
+    if (result) {
+        LOG(WARNING) << "Failed to convert BGRA to RGBA.";
     }
 }
 
 
-void fillYUYVFromUYVY(const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, unsigned imgStride) {
+void fillYUYVFromYUYV(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void *, unsigned imgStride) {
+    const AHardwareBuffer_Desc* pDesc =
+        reinterpret_cast<const AHardwareBuffer_Desc*>(&tgtBuff.buffer.description);
+    const auto height = pDesc->height;
+    uint8_t* src = (uint8_t*)imgData;
+    uint8_t* dst = (uint8_t*)tgt;
+    const auto srcStrideBytes = imgStride * kYuv422BytesPerPixel;
+    const auto dstStrideBytes = pDesc->stride * kYuv422BytesPerPixel;
+
+    for (unsigned r=0; r<height; r++) {
+        // Copy a pixel row at a time (2 bytes per pixel, averaged over a YUYV macro pixel)
+        memcpy(dst+r*dstStrideBytes, src+r*srcStrideBytes, srcStrideBytes);
+    }
+}
+
+
+void fillYUYVFromUYVY(
+        const BufferDesc& tgtBuff, uint8_t* tgt, void* imgData, void *, unsigned imgStride) {
     const AHardwareBuffer_Desc* pDesc =
         reinterpret_cast<const AHardwareBuffer_Desc*>(&tgtBuff.buffer.description);
     unsigned width = pDesc->width;
     unsigned height = pDesc->height;
     uint32_t* src = (uint32_t*)imgData;
     uint32_t* dst = (uint32_t*)tgt;
-    unsigned srcStridePixels = imgStride / 2;
+    unsigned srcStridePixels = imgStride;
     unsigned dstStridePixels = pDesc->stride;
 
     const int srcRowPadding32 = srcStridePixels/2 - width/2;  // 2 bytes per pixel, 4 bytes per word
diff --git a/cpp/evs/sampleDriver/bufferCopy.h b/cpp/evs/sampleDriver/bufferCopy.h
index b07a619..a6bb512 100644
--- a/cpp/evs/sampleDriver/bufferCopy.h
+++ b/cpp/evs/sampleDriver/bufferCopy.h
@@ -30,19 +30,22 @@
 
 
 void fillNV21FromNV21(const BufferDesc& tgtBuff, uint8_t* tgt,
-                      void* imgData, unsigned imgStride);
+                      void* imgData, void* buf, unsigned imgStride);
 
 void fillNV21FromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt,
-                      void* imgData, unsigned imgStride);
+                      void* imgData, void* buf, unsigned imgStride);
 
 void fillRGBAFromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt,
-                      void* imgData, unsigned imgStride);
+                      void* imgData, void* buf, unsigned imgStride);
+
+void fillRGBAFromUYVY(const BufferDesc& tgtBuff, uint8_t* tgt,
+                      void* imgData, void* buf, unsigned imgStride);
 
 void fillYUYVFromYUYV(const BufferDesc& tgtBuff, uint8_t* tgt,
-                      void* imgData, unsigned imgStride);
+                      void* imgData, void* buf, unsigned imgStride);
 
 void fillYUYVFromUYVY(const BufferDesc& tgtBuff, uint8_t* tgt,
-                      void* imgData, unsigned imgStride);
+                      void* imgData, void* buf, unsigned imgStride);
 
 } // namespace implementation
 } // namespace V1_1