Add color space xform support to SkJpegCodec (includes F16!)

Also changes SkColorXform to support:
RGBA->RGBA
RGBA->BGRA

Instead of:
RGBA->SkPMColor

TBR=reed@google.com
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2174493002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Committed: https://skia.googlesource.com/skia/+/73d55332e2846dd05e9efdaa2f017bcc3872884b
Review-Url: https://codereview.chromium.org/2174493002
diff --git a/bench/CodecBench.cpp b/bench/CodecBench.cpp
index fc52edf..9bd404d 100644
--- a/bench/CodecBench.cpp
+++ b/bench/CodecBench.cpp
@@ -42,7 +42,9 @@
 void CodecBench::onDelayedSetup() {
     SkAutoTDelete<SkCodec> codec(SkCodec::NewFromData(fData));
 
-    fInfo = codec->getInfo().makeColorType(fColorType).makeAlphaType(fAlphaType);
+    fInfo = codec->getInfo().makeColorType(fColorType)
+                            .makeAlphaType(fAlphaType)
+                            .makeColorSpace(nullptr);
 
     fPixelStorage.reset(fInfo.getSafeSize(fInfo.minRowBytes()));
 }
diff --git a/bench/ColorCodecBench.cpp b/bench/ColorCodecBench.cpp
index e17d597..5079b48 100644
--- a/bench/ColorCodecBench.cpp
+++ b/bench/ColorCodecBench.cpp
@@ -41,32 +41,13 @@
 
 void ColorCodecBench::decodeAndXform() {
     SkAutoTDelete<SkCodec> codec(SkCodec::NewFromData(fEncoded.get()));
+    SkASSERT(codec);
+
 #ifdef SK_DEBUG
-    const SkCodec::Result result =
+    SkCodec::Result result =
 #endif
-    codec->startScanlineDecode(fSrcInfo);
+    codec->getPixels(fDstInfo, fDst.get(), fDstInfo.minRowBytes());
     SkASSERT(SkCodec::kSuccess == result);
-
-    sk_sp<SkColorSpace> srcSpace = sk_ref_sp(codec->getInfo().colorSpace());
-    if (!srcSpace) {
-        srcSpace = SkColorSpace::NewNamed(SkColorSpace::kSRGB_Named);
-    }
-    std::unique_ptr<SkColorSpaceXform> xform = SkColorSpaceXform::New(srcSpace, fDstSpace);
-    SkASSERT(xform);
-
-    void* dst = fDst.get();
-    for (int y = 0; y < fSrcInfo.height(); y++) {
-#ifdef SK_DEBUG
-        const int rows =
-#endif
-        codec->getScanlines(fSrc.get(), 1, 0);
-        SkASSERT(1 == rows);
-
-        FLAGS_half ?
-                xform->applyToF16((uint64_t*) dst, (uint32_t*) fSrc.get(), fSrcInfo.width()) :
-                xform->applyTo8888((SkPMColor*) dst, (uint32_t*) fSrc.get(), fSrcInfo.width());
-        dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
-    }
 }
 
 #if defined(SK_TEST_QCMS)
@@ -121,7 +102,7 @@
         // Transform in place
         FLAGS_half ?
                 xform->applyToF16((uint64_t*) dst, (uint32_t*) src, fSrcInfo.width()) :
-                xform->applyTo8888((SkPMColor*) dst, (uint32_t*) src, fSrcInfo.width());
+                xform->applyToRGBA((SkPMColor*) dst, (uint32_t*) src, fSrcInfo.width());
         dst = SkTAddOffset<void>(dst, fDstInfo.minRowBytes());
         src = SkTAddOffset<void>(src, fSrcInfo.minRowBytes());
     }
@@ -157,28 +138,12 @@
 
 void ColorCodecBench::onDelayedSetup() {
     SkAutoTDelete<SkCodec> codec(SkCodec::NewFromData(fEncoded.get()));
-    fSrcInfo = codec->getInfo().makeColorType(kRGBA_8888_SkColorType);
-
-    fDstInfo = fSrcInfo;
-    if (FLAGS_half) {
-        fDstInfo = fDstInfo.makeColorType(kRGBA_F16_SkColorType);
-    }
-    fDst.reset(fDstInfo.getSafeSize(fDstInfo.minRowBytes()));
-
-    if (FLAGS_xform_only) {
-        fSrc.reset(fSrcInfo.getSafeSize(fSrcInfo.minRowBytes()));
-        codec->getPixels(fSrcInfo, fSrc.get(), fSrcInfo.minRowBytes());
-    } else {
-        // Set-up a row buffer to decode into before transforming to dst.
-        fSrc.reset(fSrcInfo.minRowBytes());
-    }
-
     fSrcData = codec->getICCData();
     sk_sp<SkData> dstData = SkData::MakeFromFileName(
             GetResourcePath("monitor_profiles/HP_ZR30w.icc").c_str());
     SkASSERT(dstData);
 
-
+    fDstSpace = nullptr;
 #if defined(SK_TEST_QCMS)
     if (FLAGS_qcms) {
         fDstSpaceQCMS.reset(FLAGS_srgb ?
@@ -196,6 +161,25 @@
                                  SkColorSpace::NewICC(dstData->data(), dstData->size());
         SkASSERT(fDstSpace);
     }
+
+    fSrcInfo = codec->getInfo().makeColorType(kRGBA_8888_SkColorType);
+
+    fDstInfo = fSrcInfo.makeColorSpace(fDstSpace);
+    if (FLAGS_half) {
+        fDstInfo = fDstInfo.makeColorType(kRGBA_F16_SkColorType);
+    }
+    fDst.reset(fDstInfo.getSafeSize(fDstInfo.minRowBytes()));
+
+    if (FLAGS_xform_only) {
+        fSrc.reset(fSrcInfo.getSafeSize(fSrcInfo.minRowBytes()));
+        codec->getPixels(fSrcInfo, fSrc.get(), fSrcInfo.minRowBytes());
+    }
+#if defined(SK_TEST_QCMS)
+    else if (FLAGS_qcms) {
+        // Set-up a row buffer to decode into before transforming to dst.
+        fSrc.reset(fSrcInfo.minRowBytes());
+    }
+#endif
 }
 
 void ColorCodecBench::onDraw(int n, SkCanvas*) {
diff --git a/dm/DM.cpp b/dm/DM.cpp
index 7a1e595..772cfe0 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -777,7 +777,8 @@
         push_src("image", "color_codec_sRGB_kF16", src);
 
 #if defined(SK_TEST_QCMS)
-        src = new ColorCodecSrc(colorImage, ColorCodecSrc::kQCMS_HPZR30w_Mode, kN32_SkColorType);
+        src = new ColorCodecSrc(colorImage, ColorCodecSrc::kQCMS_HPZR30w_Mode,
+                                kRGBA_8888_SkColorType);
         push_src("image", "color_codec_QCMS_HPZR30w", src);
 #endif
     }
diff --git a/dm/DMSrcSink.cpp b/dm/DMSrcSink.cpp
index f3f5f78..c03a8c4 100644
--- a/dm/DMSrcSink.cpp
+++ b/dm/DMSrcSink.cpp
@@ -867,28 +867,6 @@
         return SkStringPrintf("Couldn't create codec for %s.", fPath.c_str());
     }
 
-    SkImageInfo info = codec->getInfo().makeColorType(fColorType);
-    SkBitmap bitmap;
-    if (!bitmap.tryAllocPixels(info)) {
-        return SkStringPrintf("Image(%s) is too large (%d x %d)", fPath.c_str(),
-                              info.width(), info.height());
-    }
-
-    SkImageInfo decodeInfo = info;
-    size_t srcRowBytes = sizeof(SkPMColor) * info.width();
-    SkAutoMalloc src(srcRowBytes * info.height());
-    void* srcPixels = src.get();
-    if (kBaseline_Mode == fMode) {
-        srcPixels = bitmap.getPixels();
-    } else {
-        decodeInfo = decodeInfo.makeColorType(kRGBA_8888_SkColorType);
-    }
-
-    SkCodec::Result r = codec->getPixels(decodeInfo, srcPixels, srcRowBytes);
-    if (SkCodec::kSuccess != r) {
-        return SkStringPrintf("Couldn't getPixels %s. Error code %d", fPath.c_str(), r);
-    }
-
     // Load the dst ICC profile.  This particular dst is fairly similar to Adobe RGB.
     sk_sp<SkData> dstData = SkData::MakeFromFileName(
             GetResourcePath("monitor_profiles/HP_ZR30w.icc").c_str());
@@ -896,46 +874,39 @@
         return "Cannot read monitor profile.  Is the resource path set correctly?";
     }
 
+    sk_sp<SkColorSpace> dstSpace = nullptr;
+    if (kDst_sRGB_Mode == fMode) {
+        dstSpace = SkColorSpace::NewNamed(SkColorSpace::kSRGB_Named);
+    } else if (kDst_HPZR30w_Mode == fMode) {
+        dstSpace = SkColorSpace::NewICC(dstData->data(), dstData->size());
+    }
+
+    SkImageInfo decodeInfo = codec->getInfo().makeColorType(fColorType).makeColorSpace(dstSpace);
+    SkImageInfo bitmapInfo = decodeInfo;
+    if (kRGBA_8888_SkColorType == decodeInfo.colorType() ||
+        kBGRA_8888_SkColorType == decodeInfo.colorType())
+    {
+        bitmapInfo = bitmapInfo.makeColorType(kN32_SkColorType);
+    }
+
+    SkBitmap bitmap;
+    if (!bitmap.tryAllocPixels(bitmapInfo)) {
+        return SkStringPrintf("Image(%s) is too large (%d x %d)", fPath.c_str(),
+                              bitmapInfo.width(), bitmapInfo.height());
+    }
+
+    size_t rowBytes = bitmap.rowBytes();
+    SkCodec::Result r = codec->getPixels(decodeInfo, bitmap.getPixels(), rowBytes);
+    if (SkCodec::kSuccess != r) {
+        return SkStringPrintf("Couldn't getPixels %s. Error code %d", fPath.c_str(), r);
+    }
+
     switch (fMode) {
         case kBaseline_Mode:
-            canvas->drawBitmap(bitmap, 0, 0);
-            break;
         case kDst_sRGB_Mode:
-        case kDst_HPZR30w_Mode: {
-            sk_sp<SkColorSpace> srcSpace = sk_ref_sp(codec->getInfo().colorSpace());
-            sk_sp<SkColorSpace> dstSpace = (kDst_sRGB_Mode == fMode) ?
-                    SkColorSpace::NewNamed(SkColorSpace::kSRGB_Named) :
-                    SkColorSpace::NewICC(dstData->data(), dstData->size());
-            SkASSERT(dstSpace);
-
-            std::unique_ptr<SkColorSpaceXform> xform = SkColorSpaceXform::New(srcSpace, dstSpace);
-            if (!xform) {
-                return "Unimplemented color conversion.";
-            }
-
-            if (kN32_SkColorType == fColorType) {
-                uint32_t* srcRow = (uint32_t*) srcPixels;
-                uint32_t* dstRow = (uint32_t*) bitmap.getPixels();
-                for (int y = 0; y < info.height(); y++) {
-                    xform->applyTo8888(dstRow, srcRow, info.width());
-                    srcRow = SkTAddOffset<uint32_t>(srcRow, srcRowBytes);
-                    dstRow = SkTAddOffset<uint32_t>(dstRow, bitmap.rowBytes());
-                }
-            } else {
-                SkASSERT(kRGBA_F16_SkColorType == fColorType);
-
-                uint32_t* srcRow = (uint32_t*) srcPixels;
-                uint64_t* dstRow = (uint64_t*) bitmap.getPixels();
-                for (int y = 0; y < info.height(); y++) {
-                    xform->applyToF16(dstRow, srcRow, info.width());
-                    srcRow = SkTAddOffset<uint32_t>(srcRow, srcRowBytes);
-                    dstRow = SkTAddOffset<uint64_t>(dstRow, bitmap.rowBytes());
-                }
-            }
-
+        case kDst_HPZR30w_Mode:
             canvas->drawBitmap(bitmap, 0, 0);
             break;
-        }
 #if defined(SK_TEST_QCMS)
         case kQCMS_HPZR30w_Mode: {
             sk_sp<SkData> srcData = codec->getICCData();
@@ -967,12 +938,10 @@
 #endif
 
             // Perform color correction.
-            uint32_t* srcRow = (uint32_t*) srcPixels;
-            uint32_t* dstRow = (uint32_t*) bitmap.getPixels();
-            for (int y = 0; y < info.height(); y++) {
-                qcms_transform_data_type(transform, srcRow, dstRow, info.width(), outType);
-                srcRow = SkTAddOffset<uint32_t>(srcRow, srcRowBytes);
-                dstRow = SkTAddOffset<uint32_t>(dstRow, bitmap.rowBytes());
+            uint32_t* row = (uint32_t*) bitmap.getPixels();
+            for (int y = 0; y < decodeInfo.height(); y++) {
+                qcms_transform_data_type(transform, row, row, decodeInfo.width(), outType);
+                row = SkTAddOffset<uint32_t>(row, rowBytes);
             }
 
             canvas->drawBitmap(bitmap, 0, 0);
diff --git a/include/codec/SkCodec.h b/include/codec/SkCodec.h
index 5c84e07..c5dc66a 100644
--- a/include/codec/SkCodec.h
+++ b/include/codec/SkCodec.h
@@ -286,6 +286,12 @@
      *         to scale. If the generator cannot perform this scale,
      *         it will return kInvalidScale.
      *
+     *         If the info contains a non-null SkColorSpace, the codec
+     *         will perform the appropriate color space transformation.
+     *         If the caller passes in the same color space that was
+     *         reported by the codec, the color space transformation is
+     *         a no-op.
+     *
      *  If info is kIndex8_SkColorType, then the caller must provide storage for up to 256
      *  SkPMColor values in ctable. On success the generator must copy N colors into that storage,
      *  (where N is the logical number of table entries) and set ctableCount to N.
diff --git a/src/codec/SkJpegCodec.cpp b/src/codec/SkJpegCodec.cpp
index a81c759..d158b42 100644
--- a/src/codec/SkJpegCodec.cpp
+++ b/src/codec/SkJpegCodec.cpp
@@ -196,7 +196,7 @@
 
     // libjpeg errors will be caught and reported here
     if (setjmp(decoderMgr->getJmpBuf())) {
-        return decoderMgr->returnFalse("setjmp");
+        return decoderMgr->returnFalse("ReadHeader");
     }
 
     // Initialize the decompress info and the source manager
@@ -212,7 +212,7 @@
 
     // Read the jpeg header
     if (JPEG_HEADER_OK != jpeg_read_header(decoderMgr->dinfo(), true)) {
-        return decoderMgr->returnFalse("read_header");
+        return decoderMgr->returnFalse("ReadHeader");
     }
 
     if (codecOut) {
@@ -268,7 +268,8 @@
     : INHERITED(width, height, info, stream, std::move(colorSpace), origin)
     , fDecoderMgr(decoderMgr)
     , fReadyState(decoderMgr->dinfo()->global_state)
-    , fSrcRow(nullptr)
+    , fSwizzleSrcRow(nullptr)
+    , fColorXformSrcRow(nullptr)
     , fSwizzlerSubset(SkIRect::MakeEmpty())
     , fICCData(std::move(iccData))
 {}
@@ -341,14 +342,16 @@
 bool SkJpegCodec::onRewind() {
     JpegDecoderMgr* decoderMgr = nullptr;
     if (!ReadHeader(this->stream(), nullptr, &decoderMgr)) {
-        return fDecoderMgr->returnFalse("could not rewind");
+        return fDecoderMgr->returnFalse("onRewind");
     }
     SkASSERT(nullptr != decoderMgr);
     fDecoderMgr.reset(decoderMgr);
 
     fSwizzler.reset(nullptr);
-    fSrcRow = nullptr;
+    fSwizzleSrcRow = nullptr;
+    fColorXformSrcRow = nullptr;
     fStorage.reset();
+    fColorXform.reset(nullptr);
 
     return true;
 }
@@ -358,22 +361,23 @@
  * image has been implemented
  * Sets the output color space
  */
-bool SkJpegCodec::setOutputColorSpace(const SkImageInfo& dst) {
-    if (kUnknown_SkAlphaType == dst.alphaType()) {
+bool SkJpegCodec::setOutputColorSpace(const SkImageInfo& dstInfo, bool needsColorXform) {
+    if (kUnknown_SkAlphaType == dstInfo.alphaType()) {
         return false;
     }
 
-    if (kOpaque_SkAlphaType != dst.alphaType()) {
+    if (kOpaque_SkAlphaType != dstInfo.alphaType()) {
         SkCodecPrintf("Warning: an opaque image should be decoded as opaque "
                       "- it is being decoded as non-opaque, which will draw slower\n");
     }
 
-    // Check if we will decode to CMYK because a conversion to RGBA is not supported
-    J_COLOR_SPACE colorSpace = fDecoderMgr->dinfo()->jpeg_color_space;
-    bool isCMYK = JCS_CMYK == colorSpace || JCS_YCCK == colorSpace;
+    // Check if we will decode to CMYK.  libjpeg-turbo does not convert CMYK to RGBA, so
+    // we must do it ourselves.
+    J_COLOR_SPACE encodedColorType = fDecoderMgr->dinfo()->jpeg_color_space;
+    bool isCMYK = (JCS_CMYK == encodedColorType || JCS_YCCK == encodedColorType);
 
     // Check for valid color types and set the output color space
-    switch (dst.colorType()) {
+    switch (dstInfo.colorType()) {
         case kRGBA_8888_SkColorType:
             if (isCMYK) {
                 fDecoderMgr->dinfo()->out_color_space = JCS_CMYK;
@@ -384,11 +388,19 @@
         case kBGRA_8888_SkColorType:
             if (isCMYK) {
                 fDecoderMgr->dinfo()->out_color_space = JCS_CMYK;
+            } else if (needsColorXform) {
+                // Our color transformation code requires RGBA order inputs, but it'll swizzle
+                // to BGRA for us.
+                fDecoderMgr->dinfo()->out_color_space = JCS_EXT_RGBA;
             } else {
                 fDecoderMgr->dinfo()->out_color_space = JCS_EXT_BGRA;
             }
             return true;
         case kRGB_565_SkColorType:
+            if (needsColorXform) {
+                return false;
+            }
+
             if (isCMYK) {
                 fDecoderMgr->dinfo()->out_color_space = JCS_CMYK;
             } else {
@@ -401,12 +413,19 @@
             }
             return true;
         case kGray_8_SkColorType:
-            if (isCMYK) {
+            if (needsColorXform || JCS_GRAYSCALE != encodedColorType) {
                 return false;
+            }
+
+            fDecoderMgr->dinfo()->out_color_space = JCS_GRAYSCALE;
+            return true;
+        case kRGBA_F16_SkColorType:
+            SkASSERT(needsColorXform);
+
+            if (isCMYK) {
+                fDecoderMgr->dinfo()->out_color_space = JCS_CMYK;
             } else {
-                // We will enable decodes to gray even if the image is color because this is
-                // much faster than decoding to color and then converting
-                fDecoderMgr->dinfo()->out_color_space = JCS_GRAYSCALE;
+                fDecoderMgr->dinfo()->out_color_space = JCS_EXT_RGBA;
             }
             return true;
         default:
@@ -420,7 +439,7 @@
  */
 bool SkJpegCodec::onDimensionsSupported(const SkISize& size) {
     if (setjmp(fDecoderMgr->getJmpBuf())) {
-        return fDecoderMgr->returnFalse("onDimensionsSupported/setjmp");
+        return fDecoderMgr->returnFalse("onDimensionsSupported");
     }
 
     const unsigned int dstWidth = size.width();
@@ -455,6 +474,84 @@
     return true;
 }
 
+static bool needs_color_xform(const SkImageInfo& dstInfo, const SkImageInfo& srcInfo) {
+    // FIXME (msarett):
+    // Do a better check for color space equality.
+    return (kRGBA_F16_SkColorType == dstInfo.colorType()) ||
+           (dstInfo.colorSpace() && (dstInfo.colorSpace() != srcInfo.colorSpace()));
+}
+
+int SkJpegCodec::readRows(const SkImageInfo& dstInfo, void* dst, size_t rowBytes, int count) {
+    // Set the jump location for libjpeg-turbo errors
+    if (setjmp(fDecoderMgr->getJmpBuf())) {
+        return 0;
+    }
+
+    // When fSwizzleSrcRow is non-null, it means that we need to swizzle.  In this case,
+    // we will always decode into fSwizzlerSrcRow before swizzling into the next buffer.
+    // We can never swizzle "in place" because the swizzler may perform sampling and/or
+    // subsetting.
+    // When fColorXformSrcRow is non-null, it means that we need to color xform and that
+    // we cannot color xform "in place" (many times we can, but not when the dst is F16).
+    // In this case, we will color xform from fColorXformSrc into the dst.
+    JSAMPLE* decodeDst = (JSAMPLE*) dst;
+    uint32_t* swizzleDst = (uint32_t*) dst;
+    size_t decodeDstRowBytes = rowBytes;
+    size_t swizzleDstRowBytes = rowBytes;
+    if (fSwizzleSrcRow && fColorXformSrcRow) {
+        decodeDst = (JSAMPLE*) fSwizzleSrcRow;
+        swizzleDst = fColorXformSrcRow;
+        decodeDstRowBytes = 0;
+        swizzleDstRowBytes = 0;
+    } else if (fColorXformSrcRow) {
+        decodeDst = (JSAMPLE*) fColorXformSrcRow;
+        swizzleDst = fColorXformSrcRow;
+        decodeDstRowBytes = 0;
+        swizzleDstRowBytes = 0;
+    } else if (fSwizzleSrcRow) {
+        decodeDst = (JSAMPLE*) fSwizzleSrcRow;
+        decodeDstRowBytes = 0;
+    }
+
+    for (int y = 0; y < count; y++) {
+        uint32_t lines = jpeg_read_scanlines(fDecoderMgr->dinfo(), &decodeDst, 1);
+        size_t srcRowBytes = get_row_bytes(fDecoderMgr->dinfo());
+        sk_msan_mark_initialized(decodeDst, decodeDst + srcRowBytes, "skbug.com/4550");
+        if (0 == lines) {
+            return y;
+        }
+
+        if (fSwizzler) {
+            fSwizzler->swizzle(swizzleDst, decodeDst);
+        }
+
+        if (fColorXform) {
+            int width = dstInfo.width();
+            switch (dstInfo.colorType()) {
+                case kRGBA_8888_SkColorType:
+                    fColorXform->applyToRGBA((uint32_t*) dst, swizzleDst, width);
+                    break;
+                case kBGRA_8888_SkColorType:
+                    fColorXform->applyToBGRA((uint32_t*) dst, swizzleDst, width);
+                    break;
+                case kRGBA_F16_SkColorType:
+                    fColorXform->applyToF16((uint64_t*) dst, swizzleDst, width);
+                    break;
+                default:
+                    SkASSERT(false);
+                    break;
+            }
+
+            dst = SkTAddOffset<void>(dst, rowBytes);
+        }
+
+        decodeDst = SkTAddOffset<JSAMPLE>(decodeDst, decodeDstRowBytes);
+        swizzleDst = SkTAddOffset<uint32_t>(swizzleDst, swizzleDstRowBytes);
+    }
+
+    return count;
+}
+
 /*
  * Performs the jpeg decode
  */
@@ -476,11 +573,15 @@
     }
 
     // Check if we can decode to the requested destination and set the output color space
-    if (!this->setOutputColorSpace(dstInfo)) {
-        return fDecoderMgr->returnFailure("conversion_possible", kInvalidConversion);
+    bool needsColorXform = needs_color_xform(dstInfo, this->getInfo());
+    if (!this->setOutputColorSpace(dstInfo, needsColorXform)) {
+        return fDecoderMgr->returnFailure("setOutputColorSpace", kInvalidConversion);
     }
 
-    // Now, given valid output dimensions, we can start the decompress
+    if (!this->initializeColorXform(dstInfo, needsColorXform)) {
+        return fDecoderMgr->returnFailure("initializeColorXform", kInvalidParameters);
+    }
+
     if (!jpeg_start_decompress(dinfo)) {
         return fDecoderMgr->returnFailure("startDecompress", kInvalidInput);
     }
@@ -494,41 +595,39 @@
         this->initializeSwizzler(dstInfo, options);
     }
 
-    // Perform the decode a single row at a time
-    uint32_t dstHeight = dstInfo.height();
+    this->allocateStorage(dstInfo);
 
-    JSAMPLE* dstRow;
-    if (fSwizzler) {
-        // write data to storage row, then sample using swizzler
-        dstRow = fSrcRow;
-    } else {
-        // write data directly to dst
-        dstRow = (JSAMPLE*) dst;
-    }
-
-    for (uint32_t y = 0; y < dstHeight; y++) {
-        // Read rows of the image
-        uint32_t lines = jpeg_read_scanlines(dinfo, &dstRow, 1);
-        sk_msan_mark_initialized(dstRow, dstRow + dstRowBytes, "skbug.com/4550");
-
-        // If we cannot read enough rows, assume the input is incomplete
-        if (lines != 1) {
-            *rowsDecoded = y;
-            return fDecoderMgr->returnFailure("Incomplete image data", kIncompleteInput);
-        }
-
-        if (fSwizzler) {
-            // use swizzler to sample row
-            fSwizzler->swizzle(dst, dstRow);
-            dst = SkTAddOffset<JSAMPLE>(dst, dstRowBytes);
-        } else {
-            dstRow = SkTAddOffset<JSAMPLE>(dstRow, dstRowBytes);
-        }
+    int rows = this->readRows(dstInfo, dst, dstRowBytes, dstInfo.height());
+    if (rows < dstInfo.height()) {
+        *rowsDecoded = rows;
+        return fDecoderMgr->returnFailure("Incomplete image data", kIncompleteInput);
     }
 
     return kSuccess;
 }
 
+void SkJpegCodec::allocateStorage(const SkImageInfo& dstInfo) {
+    size_t swizzleBytes = 0;
+    if (fSwizzler) {
+        swizzleBytes = get_row_bytes(fDecoderMgr->dinfo());
+        SkASSERT(!fColorXform || SkIsAlign4(swizzleBytes));
+    }
+
+    size_t xformBytes = 0;
+    if (kRGBA_F16_SkColorType == dstInfo.colorType()) {
+        SkASSERT(fColorXform);
+        xformBytes = dstInfo.width() * sizeof(SkColorSpaceXform::RGBA32);
+    }
+
+    size_t totalBytes = swizzleBytes + xformBytes;
+    if (totalBytes > 0) {
+        fStorage.reset(totalBytes);
+        fSwizzleSrcRow = (swizzleBytes > 0) ? fStorage.get() : nullptr;
+        fColorXformSrcRow = (xformBytes > 0) ?
+                SkTAddOffset<uint32_t>(fStorage.get(), swizzleBytes) : nullptr;
+    }
+}
+
 void SkJpegCodec::initializeSwizzler(const SkImageInfo& dstInfo, const Options& options) {
     // libjpeg-turbo may have already performed color conversion.  We must indicate the
     // appropriate format to the swizzler.
@@ -543,9 +642,9 @@
             break;
         case JCS_CMYK:
             preSwizzled = false;
-            swizzlerInfo = SkEncodedInfo::Make(
-                    SkEncodedInfo::kInvertedCMYK_Color, swizzlerInfo.alpha(),
-                    swizzlerInfo.bitsPerComponent());
+            swizzlerInfo = SkEncodedInfo::Make(SkEncodedInfo::kInvertedCMYK_Color,
+                                               swizzlerInfo.alpha(),
+                                               swizzlerInfo.bitsPerComponent());
             break;
         default:
             break;
@@ -563,17 +662,28 @@
     fSwizzler.reset(SkSwizzler::CreateSwizzler(swizzlerInfo, nullptr, dstInfo, swizzlerOptions,
                                                nullptr, preSwizzled));
     SkASSERT(fSwizzler);
-    fStorage.reset(get_row_bytes(fDecoderMgr->dinfo()));
-    fSrcRow = fStorage.get();
+}
+
+bool SkJpegCodec::initializeColorXform(const SkImageInfo& dstInfo, bool needsColorXform) {
+    if (needsColorXform) {
+        fColorXform = SkColorSpaceXform::New(sk_ref_sp(this->getInfo().colorSpace()),
+                                             sk_ref_sp(dstInfo.colorSpace()));
+        if (!fColorXform && kRGBA_F16_SkColorType == dstInfo.colorType()) {
+            return false;
+        }
+    }
+
+    return true;
 }
 
 SkSampler* SkJpegCodec::getSampler(bool createIfNecessary) {
     if (!createIfNecessary || fSwizzler) {
-        SkASSERT(!fSwizzler || (fSrcRow && fStorage.get() == fSrcRow));
+        SkASSERT(!fSwizzler || (fSwizzleSrcRow && fStorage.get() == fSwizzleSrcRow));
         return fSwizzler;
     }
 
     this->initializeSwizzler(this->dstInfo(), this->options());
+    this->allocateStorage(this->dstInfo());
     return fSwizzler;
 }
 
@@ -586,11 +696,15 @@
     }
 
     // Check if we can decode to the requested destination and set the output color space
-    if (!this->setOutputColorSpace(dstInfo)) {
+    bool needsColorXform = needs_color_xform(dstInfo, this->getInfo());
+    if (!this->setOutputColorSpace(dstInfo, needsColorXform)) {
         return kInvalidConversion;
     }
 
-    // Now, given valid output dimensions, we can start the decompress
+    if (!this->initializeColorXform(dstInfo, needsColorXform)) {
+        return fDecoderMgr->returnFailure("initializeColorXform", kInvalidParameters);
+    }
+
     if (!jpeg_start_decompress(fDecoderMgr->dinfo())) {
         SkCodecPrintf("start decompress failed\n");
         return kInvalidInput;
@@ -651,62 +765,37 @@
     }
 #endif
 
+    this->allocateStorage(dstInfo);
+
     return kSuccess;
 }
 
 int SkJpegCodec::onGetScanlines(void* dst, int count, size_t dstRowBytes) {
-    // Set the jump location for libjpeg errors
-    if (setjmp(fDecoderMgr->getJmpBuf())) {
-        return fDecoderMgr->returnFailure("setjmp", kInvalidInput);
-    }
-    // Read rows one at a time
-    JSAMPLE* dstRow;
-    size_t srcRowBytes = get_row_bytes(fDecoderMgr->dinfo());
-    if (fSwizzler) {
-        // write data to storage row, then sample using swizzler
-        dstRow = fSrcRow;
-    } else {
-        // write data directly to dst
-        SkASSERT(count == 1 || dstRowBytes >= srcRowBytes);
-        dstRow = (JSAMPLE*) dst;
+    int rows = this->readRows(this->dstInfo(), dst, dstRowBytes, count);
+    if (rows < count) {
+        // This allows us to skip calling jpeg_finish_decompress().
+        fDecoderMgr->dinfo()->output_scanline = this->dstInfo().height();
     }
 
-    for (int y = 0; y < count; y++) {
-        // Read row of the image
-        uint32_t rowsDecoded = jpeg_read_scanlines(fDecoderMgr->dinfo(), &dstRow, 1);
-        sk_msan_mark_initialized(dstRow, dstRow + srcRowBytes, "skbug.com/4550");
-        if (rowsDecoded != 1) {
-            fDecoderMgr->dinfo()->output_scanline = this->dstInfo().height();
-            return y;
-        }
-
-        if (fSwizzler) {
-            // use swizzler to sample row
-            fSwizzler->swizzle(dst, dstRow);
-            dst = SkTAddOffset<JSAMPLE>(dst, dstRowBytes);
-        } else {
-            dstRow = SkTAddOffset<JSAMPLE>(dstRow, dstRowBytes);
-        }
-    }
-    return count;
+    return rows;
 }
 
 bool SkJpegCodec::onSkipScanlines(int count) {
     // Set the jump location for libjpeg errors
     if (setjmp(fDecoderMgr->getJmpBuf())) {
-        return fDecoderMgr->returnFalse("setjmp");
+        return fDecoderMgr->returnFalse("onSkipScanlines");
     }
 
 #ifdef TURBO_HAS_SKIP
     return (uint32_t) count == jpeg_skip_scanlines(fDecoderMgr->dinfo(), count);
 #else
-    if (!fSrcRow) {
+    if (!fSwizzleSrcRow) {
         fStorage.reset(get_row_bytes(fDecoderMgr->dinfo()));
-        fSrcRow = fStorage.get();
+        fSwizzleSrcRow = fStorage.get();
     }
 
     for (int y = 0; y < count; y++) {
-        if (1 != jpeg_read_scanlines(fDecoderMgr->dinfo(), &fSrcRow, 1)) {
+        if (1 != jpeg_read_scanlines(fDecoderMgr->dinfo(), &fSwizzleSrcRow, 1)) {
             return false;
         }
     }
diff --git a/src/codec/SkJpegCodec.h b/src/codec/SkJpegCodec.h
index 7aa275c..7eb5100 100644
--- a/src/codec/SkJpegCodec.h
+++ b/src/codec/SkJpegCodec.h
@@ -10,6 +10,7 @@
 
 #include "SkCodec.h"
 #include "SkColorSpace.h"
+#include "SkColorSpaceXform.h"
 #include "SkImageInfo.h"
 #include "SkSwizzler.h"
 #include "SkStream.h"
@@ -99,34 +100,46 @@
 
     /*
      * Checks if the conversion between the input image and the requested output
-     * image has been implemented
-     * Sets the output color space
+     * image has been implemented.
+     *
+     * Sets the output color space.
      */
-    bool setOutputColorSpace(const SkImageInfo& dst);
+    bool setOutputColorSpace(const SkImageInfo& dst, bool needsColorXform);
 
-    // scanline decoding
     void initializeSwizzler(const SkImageInfo& dstInfo, const Options& options);
+    bool initializeColorXform(const SkImageInfo& dstInfo, bool needsColorXform);
+    void allocateStorage(const SkImageInfo& dstInfo);
+    int readRows(const SkImageInfo& dstInfo, void* dst, size_t rowBytes, int count);
+
+    /*
+     * Scanline decoding.
+     */
     SkSampler* getSampler(bool createIfNecessary) override;
     Result onStartScanlineDecode(const SkImageInfo& dstInfo, const Options& options,
             SkPMColor ctable[], int* ctableCount) override;
     int onGetScanlines(void* dst, int count, size_t rowBytes) override;
     bool onSkipScanlines(int count) override;
 
-    SkAutoTDelete<JpegDecoderMgr> fDecoderMgr;
+    SkAutoTDelete<JpegDecoderMgr>      fDecoderMgr;
+
     // We will save the state of the decompress struct after reading the header.
     // This allows us to safely call onGetScaledDimensions() at any time.
-    const int                     fReadyState;
+    const int                          fReadyState;
 
-    // scanline decoding
-    SkAutoTMalloc<uint8_t>     fStorage;    // Only used if sampling is needed
-    uint8_t*                   fSrcRow;     // Only used if sampling is needed
+
+    SkAutoTMalloc<uint8_t>             fStorage;
+    uint8_t*                           fSwizzleSrcRow;
+    uint32_t*                          fColorXformSrcRow;
+
     // libjpeg-turbo provides some subsetting.  In the case that libjpeg-turbo
     // cannot take the exact the subset that we need, we will use the swizzler
     // to further subset the output from libjpeg-turbo.
-    SkIRect                    fSwizzlerSubset;
-    SkAutoTDelete<SkSwizzler>  fSwizzler;
+    SkIRect                            fSwizzlerSubset;
+
+    SkAutoTDelete<SkSwizzler>          fSwizzler;
+    std::unique_ptr<SkColorSpaceXform> fColorXform;
     
-    sk_sp<SkData>              fICCData;
+    sk_sp<SkData>                      fICCData;
 
     typedef SkCodec INHERITED;
 };
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index 924f24f..4fbfa6c 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -629,7 +629,7 @@
 
 template <>
 void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed>
-::applyTo8888(SkPMColor* dst, const RGBA32* src, int len) const
+::applyToRGBA(RGBA32* dst, const RGBA32* src, int len) const
 {
     if (fColorLUT) {
         handle_color_lut(dst, src, len, fColorLUT.get());
@@ -641,7 +641,7 @@
 
 template <>
 void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed>
-::applyTo8888(SkPMColor* dst, const RGBA32* src, int len) const
+::applyToRGBA(RGBA32* dst, const RGBA32* src, int len) const
 {
     if (fColorLUT) {
         handle_color_lut(dst, src, len, fColorLUT.get());
@@ -653,7 +653,7 @@
 
 template <>
 void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed>
-::applyTo8888(SkPMColor* dst, const RGBA32* src, int len) const
+::applyToRGBA(RGBA32* dst, const RGBA32* src, int len) const
 {
     if (fColorLUT) {
         handle_color_lut(dst, src, len, fColorLUT.get());
@@ -663,6 +663,43 @@
     SkOpts::color_xform_RGB1_to_table(dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables);
 }
 
+template <>
+void SkColorSpaceXform_Base<SkColorSpace::kSRGB_GammaNamed>
+::applyToBGRA(BGRA32* dst, const RGBA32* src, int len) const
+{
+    if (fColorLUT) {
+        handle_color_lut(dst, src, len, fColorLUT.get());
+        src = dst;
+    }
+
+    SkOpts::color_xform_RGB1_to_srgb_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst);
+}
+
+template <>
+void SkColorSpaceXform_Base<SkColorSpace::k2Dot2Curve_GammaNamed>
+::applyToBGRA(BGRA32* dst, const RGBA32* src, int len) const
+{
+    if (fColorLUT) {
+        handle_color_lut(dst, src, len, fColorLUT.get());
+        src = dst;
+    }
+
+    SkOpts::color_xform_RGB1_to_2dot2_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst);
+}
+
+template <>
+void SkColorSpaceXform_Base<SkColorSpace::kNonStandard_GammaNamed>
+::applyToBGRA(BGRA32* dst, const RGBA32* src, int len) const
+{
+    if (fColorLUT) {
+        handle_color_lut(dst, src, len, fColorLUT.get());
+        src = dst;
+    }
+
+    SkOpts::color_xform_RGB1_to_table_swaprb(dst, src, len, fSrcGammaTables, fSrcToDst,
+                                             fDstGammaTables);
+}
+
 template <SkColorSpace::GammaNamed T>
 void SkColorSpaceXform_Base<T>
 ::applyToF16(RGBAF16* dst, const RGBA32* src, int len) const
diff --git a/src/core/SkColorSpaceXform.h b/src/core/SkColorSpaceXform.h
index 0a5b35c..2696b85 100644
--- a/src/core/SkColorSpaceXform.h
+++ b/src/core/SkColorSpaceXform.h
@@ -15,6 +15,7 @@
 public:
 
     typedef uint32_t RGBA32;
+    typedef uint32_t BGRA32;
     typedef uint64_t RGBAF16;
 
     /**
@@ -32,7 +33,8 @@
      *  The src is stored as RGBA (8888) and is treated as opaque.
      *  TODO (msarett): Support non-opaque srcs.
      */
-    virtual void applyTo8888(SkPMColor* dst, const RGBA32* src, int len) const = 0;
+    virtual void applyToRGBA(RGBA32* dst, const RGBA32* src, int len) const = 0;
+    virtual void applyToBGRA(BGRA32* dst, const RGBA32* src, int len) const = 0;
     virtual void applyToF16(RGBAF16* dst, const RGBA32* src, int len) const = 0;
 
     virtual ~SkColorSpaceXform() {}
@@ -42,7 +44,8 @@
 class SkColorSpaceXform_Base : public SkColorSpaceXform {
 public:
 
-    void applyTo8888(SkPMColor* dst, const RGBA32* src, int len) const override;
+    void applyToRGBA(RGBA32* dst, const RGBA32* src, int len) const override;
+    void applyToBGRA(BGRA32* dst, const RGBA32* src, int len) const override;
     void applyToF16(RGBAF16* dst, const RGBA32* src, int len) const override;
 
     static constexpr int      kDstGammaTableSize = 1024;
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index c577bf9..9ba7bc7 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -76,6 +76,9 @@
     DEFINE_DEFAULT(color_xform_RGB1_to_srgb);
     DEFINE_DEFAULT(color_xform_RGB1_to_table);
     DEFINE_DEFAULT(color_xform_RGB1_to_linear);
+    DEFINE_DEFAULT(color_xform_RGB1_to_2dot2_swaprb);
+    DEFINE_DEFAULT(color_xform_RGB1_to_srgb_swaprb);
+    DEFINE_DEFAULT(color_xform_RGB1_to_table_swaprb);
 #undef DEFINE_DEFAULT
 
     // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index fc424e1..87489ee 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -80,7 +80,16 @@
     extern void (*color_xform_RGB1_to_linear)(uint64_t* dst, const uint32_t* src, int len,
                                               const float* const srcTables[3],
                                               const float srcToDstMatrix[16]);
-
+    extern void (*color_xform_RGB1_to_2dot2_swaprb) (uint32_t* dst, const uint32_t* src, int len,
+                                                     const float* const srcTables[3],
+                                                     const float srcToDstMatrix[16]);
+    extern void (*color_xform_RGB1_to_srgb_swaprb)(uint32_t* dst, const uint32_t* src, int len,
+                                                   const float* const srcTables[3],
+                                                   const float srcToDstMatrix[16]);
+    extern void (*color_xform_RGB1_to_table_swaprb)(uint32_t* dst, const uint32_t* src, int len,
+                                                    const float* const srcTables[3],
+                                                    const float srcToDstMatrix[16],
+                                                    const uint8_t* const dstTables[3]);
 }
 
 #endif//SkOpts_DEFINED
diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h
index b5b7f81..b3da55c 100644
--- a/src/opts/SkColorXform_opts.h
+++ b/src/opts/SkColorXform_opts.h
@@ -41,10 +41,19 @@
     kLinear_DstGamma,
 };
 
-template <DstGamma kDstGamma>
+template <DstGamma kDstGamma, bool kSwapRB>
 static void color_xform_RGB1(void* dst, const uint32_t* src, int len,
                              const float* const srcTables[3], const float matrix[16],
                              const uint8_t* const dstTables[3]) {
+    int kRShift = 0;
+    int kGShift = 8;
+    int kBShift = 16;
+    int kAShift = 24;
+    if (kSwapRB) {
+        kBShift = 0;
+        kRShift = 16;
+    }
+
     Sk4f rXgXbX = Sk4f::Load(matrix +  0),
          rYgYbY = Sk4f::Load(matrix +  4),
          rZgZbZ = Sk4f::Load(matrix +  8),
@@ -77,7 +86,8 @@
             dstBlues  = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues + rTgTbT[2];
         };
 
-        auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables] {
+        auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables, kRShift, kGShift,
+                        kBShift, kAShift] {
             if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) {
                 Sk4f (*linear_to_curve)(const Sk4f&) = (kSRGB_DstGamma == kDstGamma) ?
                         sk_linear_to_srgb_needs_trunc : linear_to_2dot2;
@@ -92,10 +102,10 @@
                 dstGreens = sk_clamp_0_255(dstGreens);
                 dstBlues  = sk_clamp_0_255(dstBlues);
 
-                auto rgba = (float_to_int(dstReds)   << SK_R32_SHIFT)
-                          | (float_to_int(dstGreens) << SK_G32_SHIFT)
-                          | (float_to_int(dstBlues)  << SK_B32_SHIFT)
-                          | (Sk4i{0xFF}              << SK_A32_SHIFT);
+                auto rgba = (float_to_int(dstReds)   << kRShift)
+                          | (float_to_int(dstGreens) << kGShift)
+                          | (float_to_int(dstBlues)  << kBShift)
+                          | (Sk4i{0xFF}              << kAShift);
                 rgba.store((uint32_t*) dst);
 
                 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
@@ -109,22 +119,22 @@
                 Sk4i indicesBlues  = Sk4f_round(scaledBlues);
 
                 uint32_t* dst32 = (uint32_t*) dst;
-                dst32[0] = dstTables[0][indicesReds  [0]] << SK_R32_SHIFT
-                         | dstTables[1][indicesGreens[0]] << SK_G32_SHIFT
-                         | dstTables[2][indicesBlues [0]] << SK_B32_SHIFT
-                         | 0xFF                           << SK_A32_SHIFT;
-                dst32[1] = dstTables[0][indicesReds  [1]] << SK_R32_SHIFT
-                         | dstTables[1][indicesGreens[1]] << SK_G32_SHIFT
-                         | dstTables[2][indicesBlues [1]] << SK_B32_SHIFT
-                         | 0xFF                           << SK_A32_SHIFT;
-                dst32[2] = dstTables[0][indicesReds  [2]] << SK_R32_SHIFT
-                         | dstTables[1][indicesGreens[2]] << SK_G32_SHIFT
-                         | dstTables[2][indicesBlues [2]] << SK_B32_SHIFT
-                         | 0xFF                           << SK_A32_SHIFT;
-                dst32[3] = dstTables[0][indicesReds  [3]] << SK_R32_SHIFT
-                         | dstTables[1][indicesGreens[3]] << SK_G32_SHIFT
-                         | dstTables[2][indicesBlues [3]] << SK_B32_SHIFT
-                         | 0xFF                           << SK_A32_SHIFT;
+                dst32[0] = dstTables[0][indicesReds  [0]] << kRShift
+                         | dstTables[1][indicesGreens[0]] << kGShift
+                         | dstTables[2][indicesBlues [0]] << kBShift
+                         | 0xFF                           << kAShift;
+                dst32[1] = dstTables[0][indicesReds  [1]] << kRShift
+                         | dstTables[1][indicesGreens[1]] << kGShift
+                         | dstTables[2][indicesBlues [1]] << kBShift
+                         | 0xFF                           << kAShift;
+                dst32[2] = dstTables[0][indicesReds  [2]] << kRShift
+                         | dstTables[1][indicesGreens[2]] << kGShift
+                         | dstTables[2][indicesBlues [2]] << kBShift
+                         | 0xFF                           << kAShift;
+                dst32[3] = dstTables[0][indicesReds  [3]] << kRShift
+                         | dstTables[1][indicesGreens[3]] << kGShift
+                         | dstTables[2][indicesBlues [3]] << kBShift
+                         | 0xFF                           << kAShift;
 
                 dst = SkTAddOffset<void>(dst, 4 * sizeof(uint32_t));
             } else {
@@ -167,17 +177,21 @@
             uint32_t rgba;
             SkNx_cast<uint8_t>(float_to_int(dstPixel)).store(&rgba);
             rgba |= 0xFF000000;
-            *((uint32_t*) dst) = SkSwizzle_RGBA_to_PMColor(rgba);
+            if (kSwapRB) {
+                *((uint32_t*) dst) = SkSwizzle_RB(rgba);
+            } else {
+                *((uint32_t*) dst) = rgba;
+            }
             dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
         } else if (kTable_DstGamma == kDstGamma) {
             Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f);
 
             Sk4i indices = Sk4f_round(scaledPixel);
 
-            *((uint32_t*) dst) = dstTables[0][indices[0]] << SK_R32_SHIFT
-                               | dstTables[1][indices[1]] << SK_G32_SHIFT
-                               | dstTables[2][indices[2]] << SK_B32_SHIFT
-                               | 0xFF                     << SK_A32_SHIFT;
+            *((uint32_t*) dst) = dstTables[0][indices[0]] << kRShift
+                               | dstTables[1][indices[1]] << kGShift
+                               | dstTables[2][indices[2]] << kBShift
+                               | 0xFF                     << kAShift;
 
             dst = SkTAddOffset<void>(dst, sizeof(uint32_t));
         } else {
@@ -195,23 +209,42 @@
 
 static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len,
                                       const float* const srcTables[3], const float matrix[16]) {
-    color_xform_RGB1<k2Dot2_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
+    color_xform_RGB1<k2Dot2_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
 }
 
 static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len,
                                      const float* const srcTables[3], const float matrix[16]) {
-    color_xform_RGB1<kSRGB_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
+    color_xform_RGB1<kSRGB_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
 }
 
 static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len,
                                       const float* const srcTables[3], const float matrix[16],
                                       const uint8_t* const dstTables[3]) {
-    color_xform_RGB1<kTable_DstGamma>(dst, src, len, srcTables, matrix, dstTables);
+    color_xform_RGB1<kTable_DstGamma, false>(dst, src, len, srcTables, matrix, dstTables);
 }
 
 static void color_xform_RGB1_to_linear(uint64_t* dst, const uint32_t* src, int len,
                                        const float* const srcTables[3], const float matrix[16]) {
-    color_xform_RGB1<kLinear_DstGamma>(dst, src, len, srcTables, matrix, nullptr);
+    color_xform_RGB1<kLinear_DstGamma, false>(dst, src, len, srcTables, matrix, nullptr);
+}
+
+static void color_xform_RGB1_to_2dot2_swaprb(uint32_t* dst, const uint32_t* src, int len,
+                                             const float* const srcTables[3],
+                                             const float matrix[16]) {
+    color_xform_RGB1<k2Dot2_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr);
+}
+
+static void color_xform_RGB1_to_srgb_swaprb(uint32_t* dst, const uint32_t* src, int len,
+                                            const float* const srcTables[3],
+                                            const float matrix[16]) {
+    color_xform_RGB1<kSRGB_DstGamma, true>(dst, src, len, srcTables, matrix, nullptr);
+}
+
+static void color_xform_RGB1_to_table_swaprb(uint32_t* dst, const uint32_t* src, int len,
+                                             const float* const srcTables[3],
+                                             const float matrix[16],
+                                             const uint8_t* const dstTables[3]) {
+    color_xform_RGB1<kTable_DstGamma, true>(dst, src, len, srcTables, matrix, dstTables);
 }
 
 }  // namespace SK_OPTS_NS
diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp
index e1e024d..e70cede 100644
--- a/src/opts/SkOpts_sse41.cpp
+++ b/src/opts/SkOpts_sse41.cpp
@@ -21,9 +21,12 @@
         srcover_srgb_srgb    = sse41::srcover_srgb_srgb;
         blit_row_s32a_opaque = sse41::blit_row_s32a_opaque;
 
-        color_xform_RGB1_to_2dot2  = sse41::color_xform_RGB1_to_2dot2;
-        color_xform_RGB1_to_srgb   = sse41::color_xform_RGB1_to_srgb;
-        color_xform_RGB1_to_table  = sse41::color_xform_RGB1_to_table;
-        color_xform_RGB1_to_linear = sse41::color_xform_RGB1_to_linear;
+        color_xform_RGB1_to_2dot2        = sse41::color_xform_RGB1_to_2dot2;
+        color_xform_RGB1_to_srgb         = sse41::color_xform_RGB1_to_srgb;
+        color_xform_RGB1_to_table        = sse41::color_xform_RGB1_to_table;
+        color_xform_RGB1_to_linear       = sse41::color_xform_RGB1_to_linear;
+        color_xform_RGB1_to_2dot2_swaprb = sse41::color_xform_RGB1_to_2dot2_swaprb;
+        color_xform_RGB1_to_srgb_swaprb  = sse41::color_xform_RGB1_to_srgb_swaprb;
+        color_xform_RGB1_to_table_swaprb = sse41::color_xform_RGB1_to_table_swaprb;
     }
 }
diff --git a/tests/ColorSpaceXformTest.cpp b/tests/ColorSpaceXformTest.cpp
index 23b08ce..1e75ee6 100644
--- a/tests/ColorSpaceXformTest.cpp
+++ b/tests/ColorSpaceXformTest.cpp
@@ -37,19 +37,19 @@
 
     // Create and perform an identity xform.
     std::unique_ptr<SkColorSpaceXform> xform = ColorSpaceXformTest::CreateIdentityXform(gammas);
-    xform->applyTo8888(dstPixels, srcPixels, width);
+    xform->applyToRGBA(dstPixels, srcPixels, width);
 
     // Since the src->dst matrix is the identity, and the gamma curves match,
     // the pixels should be unchanged.
     for (int i = 0; i < width; i++) {
         REPORTER_ASSERT(r, almost_equal(((srcPixels[i] >>  0) & 0xFF),
-                                        SkGetPackedR32(dstPixels[i])));
+                                        ((dstPixels[i] >>  0) & 0xFF)));
         REPORTER_ASSERT(r, almost_equal(((srcPixels[i] >>  8) & 0xFF),
-                                        SkGetPackedG32(dstPixels[i])));
+                                        ((dstPixels[i] >>  8) & 0xFF)));
         REPORTER_ASSERT(r, almost_equal(((srcPixels[i] >> 16) & 0xFF),
-                                        SkGetPackedB32(dstPixels[i])));
+                                        ((dstPixels[i] >> 16) & 0xFF)));
         REPORTER_ASSERT(r, almost_equal(((srcPixels[i] >> 24) & 0xFF),
-                                        SkGetPackedA32(dstPixels[i])));
+                                        ((dstPixels[i] >> 24) & 0xFF)));
     }
 }
 
diff --git a/tools/viewer/ImageSlide.cpp b/tools/viewer/ImageSlide.cpp
index b047154..7dcee5d 100644
--- a/tools/viewer/ImageSlide.cpp
+++ b/tools/viewer/ImageSlide.cpp
@@ -37,20 +37,18 @@
 
 void ImageSlide::load(SkScalar, SkScalar) {
     sk_sp<SkData> encoded = SkData::MakeFromFileName(fPath.c_str());
-    fImage = SkImage::MakeFromEncoded(encoded);
-    fImage->asLegacyBitmap(&fOriginalBitmap, SkImage::kRO_LegacyBitmapMode);
-
     SkAutoTDelete<SkCodec> codec(SkCodec::NewFromData(encoded.get()));
-    sk_sp<SkColorSpace> srcSpace = sk_ref_sp(codec->getInfo().colorSpace());
-    sk_sp<SkColorSpace> dstSpace = SkColorSpace::NewNamed(SkColorSpace::kAdobeRGB_Named);
-    std::unique_ptr<SkColorSpaceXform> xform = SkColorSpaceXform::New(srcSpace, dstSpace);
-    fOriginalBitmap.deepCopyTo(&fXformedBitmap);
-    uint32_t* row = (uint32_t*) fXformedBitmap.getPixels();
-    for (int y = 0; y < fXformedBitmap.height(); y++) {
-        xform->applyTo8888(row, row, fXformedBitmap.width());
-        row = SkTAddOffset<uint32_t>(row, fXformedBitmap.rowBytes());
+    if (!codec) {
+        return;
     }
-    fXformedBitmap.notifyPixelsChanged(); // This is needed for the deepCopy
+
+    fOriginalBitmap.allocPixels(codec->getInfo());
+    codec->getPixels(codec->getInfo(), fOriginalBitmap.getPixels(), fOriginalBitmap.rowBytes());
+
+    SkImageInfo xformedInfo = codec->getInfo().makeColorSpace(
+            SkColorSpace::NewNamed(SkColorSpace::kAdobeRGB_Named));
+    fXformedBitmap.allocPixels(xformedInfo);
+    codec->getPixels(xformedInfo, fXformedBitmap.getPixels(), fXformedBitmap.rowBytes());
 }
 
 void ImageSlide::unload() {