Revert "start cleaning up non-skcms SkColorSpaceXforms"

This reverts commit 339133f82c30cd3080672db28e6f72c894cba05a.

Reason for revert: broke NinePatchDrawableTest.testGetPadding?  stranger things have happened.

Original change's description:
> start cleaning up non-skcms SkColorSpaceXforms
> 
> I think this gets rid of
>   - SkColorSpaceXform_Base
>   - SkColorSpaceXform_XYZ
>   - SkColorSpaceXform_A2B
> and lots of support code.  Might be more left to clean up?
> 
> Change-Id: I560d974d1e879dfd6a63ee2244a3dd88bd495c8a
> Reviewed-on: https://skia-review.googlesource.com/129512
> Commit-Queue: Brian Osman <brianosman@google.com>
> Auto-Submit: Mike Klein <mtklein@chromium.org>
> Reviewed-by: Brian Osman <brianosman@google.com>

TBR=mtklein@chromium.org,brianosman@google.com

Change-Id: I9e76195481b8658b34936aeece278d81c286c0fa
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/129680
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/codec/SkCodec.cpp b/src/codec/SkCodec.cpp
index bf12ad2..f67e6c0 100644
--- a/src/codec/SkCodec.cpp
+++ b/src/codec/SkCodec.cpp
@@ -9,7 +9,7 @@
 #include "SkCodec.h"
 #include "SkCodecPriv.h"
 #include "SkColorSpace.h"
-#include "SkColorSpaceXformPriv.h"
+#include "SkColorSpaceXform_Base.h"
 #include "SkData.h"
 #include "SkFrameHolder.h"
 #include "SkGifCodec.h"
@@ -657,9 +657,8 @@
     bool needsColorCorrectPremul = needs_premul(dstInfo.alphaType(), encodedAlpha) &&
                                    SkTransferFunctionBehavior::kRespect == premulBehavior;
     if (needs_color_xform(dstInfo, fSrcInfo.colorSpace(), needsColorCorrectPremul)) {
-        fColorXform = SkMakeColorSpaceXform(fSrcInfo.colorSpace(),
-                                            dstInfo.colorSpace(),
-                                            premulBehavior);
+        fColorXform = SkColorSpaceXform_Base::New(fSrcInfo.colorSpace(), dstInfo.colorSpace(),
+                                                  premulBehavior);
         if (!fColorXform) {
             return false;
         }
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index b2228fc..70a5beb 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -5,23 +5,596 @@
  * found in the LICENSE file.
  */
 
-#include "SkColorSpaceXform.h"
+#include "SkColorData.h"
+#include "SkColorSpacePriv.h"
 #include "SkColorSpaceXformPriv.h"
+#include "SkColorSpaceXform_A2B.h"
+#include "SkColorSpaceXform_Base.h"
+#include "SkColorSpace_A2B.h"
+#include "SkColorSpace_XYZ.h"
+#include "SkHalf.h"
+#include "SkMakeUnique.h"
+#include "SkOpts.h"
+#include "SkPM4fPriv.h"
+#include "SkRasterPipeline.h"
+#include "SkSRGB.h"
+#include "../jumper/SkJumper.h"
 
-std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(SkColorSpace* src, SkColorSpace* dst) {
-    return SkMakeColorSpaceXform(src, dst, SkTransferFunctionBehavior::kRespect);
+static constexpr float sk_linear_from_2dot2[256] = {
+        0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f,
+        0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f,
+        0.000492503787191433f, 0.000638182842167022f, 0.000804658499513058f, 0.000992374304074325f,
+        0.001201739522438400f, 0.001433134589671860f, 0.001686915316789280f, 0.001963416213396470f,
+        0.002262953160706430f, 0.002585825596234170f, 0.002932318323938360f, 0.003302703032003640f,
+        0.003697239578900130f, 0.004116177093282750f, 0.004559754922526020f, 0.005028203456855540f,
+        0.005521744850239660f, 0.006040593654849810f, 0.006584957382581690f, 0.007155037004573030f,
+        0.007751027397660610f, 0.008373117745148580f, 0.009021491898012130f, 0.009696328701658230f,
+        0.010397802292555300f, 0.011126082368383200f, 0.011881334434813700f, 0.012663720031582100f,
+        0.013473396940142600f, 0.014310519374884100f, 0.015175238159625200f, 0.016067700890886900f,
+        0.016988052089250000f, 0.017936433339950200f, 0.018912983423721500f, 0.019917838438785700f,
+        0.020951131914781100f, 0.022012994919336500f, 0.023103556157921400f, 0.024222942067534200f,
+        0.025371276904734600f, 0.026548682828472900f, 0.027755279978126000f, 0.028991186547107800f,
+        0.030256518852388700f, 0.031551391400226400f, 0.032875916948383800f, 0.034230206565082000f,
+        0.035614369684918800f, 0.037028514161960200f, 0.038472746320194600f, 0.039947171001525600f,
+        0.041451891611462500f, 0.042987010162657100f, 0.044552627316421400f, 0.046148842422351000f,
+        0.047775753556170600f, 0.049433457555908000f, 0.051122050056493400f, 0.052841625522879000f,
+        0.054592277281760300f, 0.056374097551979800f, 0.058187177473685400f, 0.060031607136313200f,
+        0.061907475605455800f, 0.063814870948677200f, 0.065753880260330100f, 0.067724589685424300f,
+        0.069727084442598800f, 0.071761448846239100f, 0.073827766327784600f, 0.075926119456264800f,
+        0.078056589958101900f, 0.080219258736215100f, 0.082414205888459200f, 0.084641510725429500f,
+        0.086901251787660300f, 0.089193506862247800f, 0.091518352998919500f, 0.093875866525577800f,
+        0.096266123063339700f, 0.098689197541094500f, 0.101145164209600000f, 0.103634096655137000f,
+        0.106156067812744000f, 0.108711149979039000f, 0.111299414824660000f, 0.113920933406333000f,
+        0.116575776178572000f, 0.119264013005047000f, 0.121985713169619000f, 0.124740945387051000f,
+        0.127529777813422000f, 0.130352278056244000f, 0.133208513184300000f, 0.136098549737202000f,
+        0.139022453734703000f, 0.141980290685736000f, 0.144972125597231000f, 0.147998022982685000f,
+        0.151058046870511000f, 0.154152260812165000f, 0.157280727890073000f, 0.160443510725344000f,
+        0.163640671485290000f, 0.166872271890766000f, 0.170138373223312000f, 0.173439036332135000f,
+        0.176774321640903000f, 0.180144289154390000f, 0.183548998464951000f, 0.186988508758844000f,
+        0.190462878822409000f, 0.193972167048093000f, 0.197516431440340000f, 0.201095729621346000f,
+        0.204710118836677000f, 0.208359655960767000f, 0.212044397502288000f, 0.215764399609395000f,
+        0.219519718074868000f, 0.223310408341127000f, 0.227136525505149000f, 0.230998124323267000f,
+        0.234895259215880000f, 0.238827984272048000f, 0.242796353254002000f, 0.246800419601550000f,
+        0.250840236436400000f, 0.254915856566385000f, 0.259027332489606000f, 0.263174716398492000f,
+        0.267358060183772000f, 0.271577415438375000f, 0.275832833461245000f, 0.280124365261085000f,
+        0.284452061560024000f, 0.288815972797219000f, 0.293216149132375000f, 0.297652640449211000f,
+        0.302125496358853000f, 0.306634766203158000f, 0.311180499057984000f, 0.315762743736397000f,
+        0.320381548791810000f, 0.325036962521076000f, 0.329729032967515000f, 0.334457807923889000f,
+        0.339223334935327000f, 0.344025661302187000f, 0.348864834082879000f, 0.353740900096629000f,
+        0.358653905926199000f, 0.363603897920553000f, 0.368590922197487000f, 0.373615024646202000f,
+        0.378676250929840000f, 0.383774646487975000f, 0.388910256539059000f, 0.394083126082829000f,
+        0.399293299902674000f, 0.404540822567962000f, 0.409825738436323000f, 0.415148091655907000f,
+        0.420507926167587000f, 0.425905285707146000f, 0.431340213807410000f, 0.436812753800359000f,
+        0.442322948819202000f, 0.447870841800410000f, 0.453456475485731000f, 0.459079892424160000f,
+        0.464741134973889000f, 0.470440245304218000f, 0.476177265397440000f, 0.481952237050698000f,
+        0.487765201877811000f, 0.493616201311074000f, 0.499505276603030000f, 0.505432468828216000f,
+        0.511397818884880000f, 0.517401367496673000f, 0.523443155214325000f, 0.529523222417277000f,
+        0.535641609315311000f, 0.541798355950137000f, 0.547993502196972000f, 0.554227087766085000f,
+        0.560499152204328000f, 0.566809734896638000f, 0.573158875067523000f, 0.579546611782525000f,
+        0.585972983949661000f, 0.592438030320847000f, 0.598941789493296000f, 0.605484299910907000f,
+        0.612065599865624000f, 0.618685727498780000f, 0.625344720802427000f, 0.632042617620641000f,
+        0.638779455650817000f, 0.645555272444935000f, 0.652370105410821000f, 0.659223991813387000f,
+        0.666116968775851000f, 0.673049073280942000f, 0.680020342172095000f, 0.687030812154625000f,
+        0.694080519796882000f, 0.701169501531402000f, 0.708297793656032000f, 0.715465432335048000f,
+        0.722672453600255000f, 0.729918893352071000f, 0.737204787360605000f, 0.744530171266715000f,
+        0.751895080583051000f, 0.759299550695091000f, 0.766743616862161000f, 0.774227314218442000f,
+        0.781750677773962000f, 0.789313742415586000f, 0.796916542907978000f, 0.804559113894567000f,
+        0.812241489898490000f, 0.819963705323528000f, 0.827725794455034000f, 0.835527791460841000f,
+        0.843369730392169000f, 0.851251645184515000f, 0.859173569658532000f, 0.867135537520905000f,
+        0.875137582365205000f, 0.883179737672745000f, 0.891262036813419000f, 0.899384513046529000f,
+        0.907547199521614000f, 0.915750129279253000f, 0.923993335251873000f, 0.932276850264543000f,
+        0.940600707035753000f, 0.948964938178195000f, 0.957369576199527000f, 0.965814653503130000f,
+        0.974300202388861000f, 0.982826255053791000f, 0.991392843592940000f, 1.000000000000000000f,
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+static void build_table_linear_from_gamma(float* outTable, float exponent) {
+    for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+        *outTable++ = powf(x, exponent);
+    }
 }
 
-std::unique_ptr<SkColorSpaceXform> SkMakeColorSpaceXform(
+// outTable is always 256 entries, inTable may be larger or smaller.
+static void build_table_linear_from_gamma(float* outTable, const float* inTable,
+                                          int inTableSize) {
+    if (256 == inTableSize) {
+        memcpy(outTable, inTable, sizeof(float) * 256);
+        return;
+    }
+
+    for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+        *outTable++ = interp_lut(x, inTable, inTableSize);
+    }
+}
+
+
+static void build_table_linear_from_gamma(float* outTable, float g, float a, float b, float c,
+                                          float d, float e, float f) {
+    // Y = (aX + b)^g + e  for X >= d
+    // Y = cX + f          otherwise
+    for (float x = 0.0f; x <= 1.0f; x += (1.0f/255.0f)) {
+        if (x >= d) {
+            *outTable++ = clamp_0_1(powf(a * x + b, g) + e);
+        } else {
+            *outTable++ = clamp_0_1(c * x + f);
+        }
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+static const int kDstGammaTableSize = SkColorSpaceXform_Base::kDstGammaTableSize;
+
+static void build_table_linear_to_gamma(uint8_t* outTable, float exponent) {
+    float toGammaExp = 1.0f / exponent;
+
+    for (int i = 0; i < kDstGammaTableSize; i++) {
+        float x = ((float) i) * (1.0f / ((float) (kDstGammaTableSize - 1)));
+        outTable[i] = clamp_normalized_float_to_byte(powf(x, toGammaExp));
+    }
+}
+
+static void build_table_linear_to_gamma(uint8_t* outTable, const float* inTable,
+                                        int inTableSize) {
+    invert_table_gamma(nullptr, outTable, kDstGammaTableSize, inTable, inTableSize);
+}
+
+static float inverse_parametric(float x, float g, float a, float b, float c, float d, float e,
+                                float f) {
+    // We need to take the inverse of the following piecewise function.
+    // Y = (aX + b)^g + e  for X >= d
+    // Y = cX + f          otherwise
+
+    // Assume that the gamma function is continuous, or this won't make much sense anyway.
+    // Plug in |d| to the second equation to calculate the new piecewise interval.
+    // Then simply use the inverse of the original functions.
+    float interval = c * d + f;
+    if (x < interval) {
+        // X = (Y - F) / C
+        if (0.0f == c) {
+            // The gamma curve for this segment is constant, so the inverse is undefined.
+            // Since this is the lower segment, guess zero.
+            return 0.0f;
+        }
+
+        return (x - f) / c;
+    }
+
+    // X = ((Y - E)^(1 / G) - B) / A
+    if (0.0f == a || 0.0f == g) {
+        // The gamma curve for this segment is constant, so the inverse is undefined.
+        // Since this is the upper segment, guess one.
+        return 1.0f;
+    }
+
+    return (powf(x - e, 1.0f / g) - b) / a;
+}
+
+static void build_table_linear_to_gamma(uint8_t* outTable, float g, float a,
+                                        float b, float c, float d, float e, float f) {
+    for (int i = 0; i < kDstGammaTableSize; i++) {
+        float x = ((float) i) * (1.0f / ((float) (kDstGammaTableSize - 1)));
+        float y = inverse_parametric(x, g, a, b, c, d, e, f);
+        outTable[i] = clamp_normalized_float_to_byte(y);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+struct GammaFns {
+    const T* fSRGBTable;
+    const T* f2Dot2Table;
+    void (*fBuildFromValue)(T*, float);
+    void (*fBuildFromTable)(T*, const float*, int);
+    void (*fBuildFromParam)(T*, float, float, float, float, float, float, float);
+};
+
+static const GammaFns<float> kToLinear {
+    sk_linear_from_srgb,
+    sk_linear_from_2dot2,
+    &build_table_linear_from_gamma,
+    &build_table_linear_from_gamma,
+    &build_table_linear_from_gamma,
+};
+
+static const GammaFns<uint8_t> kFromLinear {
+    nullptr,
+    nullptr,
+    &build_table_linear_to_gamma,
+    &build_table_linear_to_gamma,
+    &build_table_linear_to_gamma,
+};
+
+// Build tables to transform src gamma to linear.
+template <typename T>
+static void build_gamma_tables(const T* outGammaTables[3], T* gammaTableStorage, int gammaTableSize,
+                               const SkColorSpace_XYZ* space, const GammaFns<T>& fns,
+                               bool gammasAreMatching)
+{
+    switch (space->gammaNamed()) {
+        case kSRGB_SkGammaNamed:
+            outGammaTables[0] = outGammaTables[1] = outGammaTables[2] = fns.fSRGBTable;
+            break;
+        case k2Dot2Curve_SkGammaNamed:
+            outGammaTables[0] = outGammaTables[1] = outGammaTables[2] = fns.f2Dot2Table;
+            break;
+        case kLinear_SkGammaNamed:
+            outGammaTables[0] = outGammaTables[1] = outGammaTables[2] = nullptr;
+            break;
+        default: {
+            const SkGammas* gammas = space->gammas();
+            SkASSERT(gammas);
+
+            auto build_table = [=](int i) {
+                if (gammas->isNamed(i)) {
+                    switch (gammas->data(i).fNamed) {
+                        case kSRGB_SkGammaNamed:
+                            (*fns.fBuildFromParam)(&gammaTableStorage[i * gammaTableSize],
+                                                   gSRGB_TransferFn.fG,
+                                                   gSRGB_TransferFn.fA,
+                                                   gSRGB_TransferFn.fB,
+                                                   gSRGB_TransferFn.fC,
+                                                   gSRGB_TransferFn.fD,
+                                                   gSRGB_TransferFn.fE,
+                                                   gSRGB_TransferFn.fF);
+                            outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                            break;
+                        case k2Dot2Curve_SkGammaNamed:
+                            (*fns.fBuildFromValue)(&gammaTableStorage[i * gammaTableSize], 2.2f);
+                            outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                            break;
+                        case kLinear_SkGammaNamed:
+                            (*fns.fBuildFromValue)(&gammaTableStorage[i * gammaTableSize], 1.0f);
+                            outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                            break;
+                        default:
+                            SkASSERT(false);
+                            break;
+                    }
+                } else if (gammas->isValue(i)) {
+                    (*fns.fBuildFromValue)(&gammaTableStorage[i * gammaTableSize],
+                                           gammas->data(i).fValue);
+                    outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                } else if (gammas->isTable(i)) {
+                    (*fns.fBuildFromTable)(&gammaTableStorage[i * gammaTableSize], gammas->table(i),
+                                           gammas->data(i).fTable.fSize);
+                    outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                } else {
+                    SkASSERT(gammas->isParametric(i));
+                    const SkColorSpaceTransferFn& params = gammas->params(i);
+                    (*fns.fBuildFromParam)(&gammaTableStorage[i * gammaTableSize], params.fG,
+                                           params.fA, params.fB, params.fC, params.fD, params.fE,
+                                           params.fF);
+                    outGammaTables[i] = &gammaTableStorage[i * gammaTableSize];
+                }
+            };
+
+            if (gammasAreMatching) {
+                build_table(0);
+                outGammaTables[1] = outGammaTables[0];
+                outGammaTables[2] = outGammaTables[0];
+            } else {
+                build_table(0);
+                build_table(1);
+                build_table(2);
+            }
+
+            break;
+        }
+    }
+}
+
+void SkColorSpaceXform_Base::BuildDstGammaTables(const uint8_t* dstGammaTables[3],
+                                                 uint8_t* dstStorage,
+                                                 const SkColorSpace_XYZ* space,
+                                                 bool gammasAreMatching) {
+    build_gamma_tables(dstGammaTables, dstStorage, kDstGammaTableSize, space, kFromLinear,
+                       gammasAreMatching);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(SkColorSpace* src,
+                                                          SkColorSpace* dst) {
+    return SkColorSpaceXform_Base::New(src, dst, SkTransferFunctionBehavior::kRespect);
+}
+
+std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform_Base::New(
         SkColorSpace* src,
         SkColorSpace* dst,
         SkTransferFunctionBehavior premulBehavior) {
-#if defined(SK_USE_SKCMS)
-    if (src && dst && dst->toXYZD50()) {
-        return SkMakeColorSpaceXform_skcms(src, dst, premulBehavior);
+
+    if (!src || !dst) {
+        // Invalid input
+        return nullptr;
     }
+
+    if (!dst->toXYZD50()) {
+        SkCSXformPrintf("only XYZ destinations supported\n");
+        return nullptr;
+    }
+
+#if defined(SK_USE_SKCMS)
+    return MakeSkcmsXform(src, dst, premulBehavior);
+#else
+    if (src->toXYZD50()) {
+        return skstd::make_unique<SkColorSpaceXform_XYZ>(static_cast<SkColorSpace_XYZ*>(src),
+                                                         static_cast<SkColorSpace_XYZ*>(dst),
+                                                         premulBehavior);
+    }
+    return skstd::make_unique<SkColorSpaceXform_A2B>(static_cast<SkColorSpace_A2B*>(src),
+                                                     static_cast<SkColorSpace_XYZ*>(dst));
 #endif
-    return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+static inline int num_tables(SkColorSpace_XYZ* space) {
+    switch (space->gammaNamed()) {
+        case kSRGB_SkGammaNamed:
+        case k2Dot2Curve_SkGammaNamed:
+        case kLinear_SkGammaNamed:
+            return 0;
+        default: {
+            const SkGammas* gammas = space->gammas();
+            SkASSERT(gammas);
+
+            // It's likely that each component will have the same gamma.  In this case,
+            // we only need to build one table.
+            return gammas->allChannelsSame() ? 1 : 3;
+        }
+    }
+}
+
+SkColorSpaceXform_XYZ::SkColorSpaceXform_XYZ(SkColorSpace_XYZ* src,
+                                             SkColorSpace_XYZ* dst,
+                                             SkTransferFunctionBehavior premulBehavior)
+    : fPremulBehavior(premulBehavior)
+{
+    fColorSpacesAreIdentical = SkColorSpace::Equals(src, dst);
+
+    SkMatrix44 srcToDst(SkMatrix44::kIdentity_Constructor);
+    if (!fColorSpacesAreIdentical && *src->toXYZD50() != *dst->toXYZD50()) {
+        srcToDst.setConcat(*dst->fromXYZD50(), *src->toXYZD50());
+    }
+
+    fSrcToDst[ 0] = srcToDst.get(0, 0);
+    fSrcToDst[ 1] = srcToDst.get(1, 0);
+    fSrcToDst[ 2] = srcToDst.get(2, 0);
+    fSrcToDst[ 3] = srcToDst.get(0, 1);
+    fSrcToDst[ 4] = srcToDst.get(1, 1);
+    fSrcToDst[ 5] = srcToDst.get(2, 1);
+    fSrcToDst[ 6] = srcToDst.get(0, 2);
+    fSrcToDst[ 7] = srcToDst.get(1, 2);
+    fSrcToDst[ 8] = srcToDst.get(2, 2);
+    fSrcToDst[ 9] = srcToDst.get(0, 3);
+    fSrcToDst[10] = srcToDst.get(1, 3);
+    fSrcToDst[11] = srcToDst.get(2, 3);
+    fSrcToDstIsIdentity = srcToDst.isIdentity();
+
+    const int numSrcTables = num_tables(src);
+    const size_t srcEntries = numSrcTables * 256;
+    const bool srcGammasAreMatching = (1 >= numSrcTables);
+    fSrcStorage.reset(srcEntries);
+    build_gamma_tables(fSrcGammaTables, fSrcStorage.get(), 256, src, kToLinear,
+                       srcGammasAreMatching);
+
+    const int numDstTables = num_tables(dst);
+    dst->toDstGammaTables(fDstGammaTables, &fDstStorage, numDstTables);
+
+    if (src->gammaIsLinear()) {
+        fSrcGamma = kLinear_SrcGamma;
+    } else if (kSRGB_SkGammaNamed == src->gammaNamed()) {
+        fSrcGamma = kSRGB_SrcGamma;
+    } else {
+        fSrcGamma = kTable_SrcGamma;
+    }
+
+    switch (dst->gammaNamed()) {
+        case kSRGB_SkGammaNamed:
+            fDstGamma = kSRGB_DstGamma;
+            break;
+        case k2Dot2Curve_SkGammaNamed:
+            fDstGamma = k2Dot2_DstGamma;
+            break;
+        case kLinear_SkGammaNamed:
+            fDstGamma = kLinear_DstGamma;
+            break;
+        default:
+            fDstGamma = kTable_DstGamma;
+            break;
+    }
+}
+
+bool SkColorSpaceXform_XYZ::apply(ColorFormat dstColorFormat, void* dst,
+                                  ColorFormat srcColorFormat, const void* src,
+                                  int len, SkAlphaType alphaType) const {
+    if (fColorSpacesAreIdentical && kPremul_SkAlphaType != alphaType) {
+        if ((kRGBA_8888_ColorFormat == dstColorFormat &&
+             kRGBA_8888_ColorFormat == srcColorFormat) ||
+            (kBGRA_8888_ColorFormat == dstColorFormat &&
+             kBGRA_8888_ColorFormat == srcColorFormat))
+        {
+            memcpy(dst, src, len * sizeof(uint32_t));
+            return true;
+        }
+
+        if ((kRGBA_8888_ColorFormat == dstColorFormat &&
+             kBGRA_8888_ColorFormat == srcColorFormat) ||
+            (kBGRA_8888_ColorFormat == dstColorFormat &&
+             kRGBA_8888_ColorFormat == srcColorFormat))
+        {
+            SkOpts::RGBA_to_BGRA((uint32_t*)dst, src, len);
+            return true;
+        }
+    }
+
+    SkRasterPipeline_<256> pipeline;
+
+    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
+                       dst_ctx = { (void*)dst, 0 };
+
+    SkJumper_LoadTablesCtx loadTables;
+    switch (srcColorFormat) {
+        case kRGBA_8888_ColorFormat:
+            if (kLinear_SrcGamma == fSrcGamma) {
+                pipeline.append(SkRasterPipeline::load_8888, &src_ctx);
+            } else {
+                loadTables.src = src;
+                loadTables.r = fSrcGammaTables[0];
+                loadTables.g = fSrcGammaTables[1];
+                loadTables.b = fSrcGammaTables[2];
+                pipeline.append(SkRasterPipeline::load_tables, &loadTables);
+            }
+
+            break;
+        case kBGRA_8888_ColorFormat:
+            if (kLinear_SrcGamma == fSrcGamma) {
+                pipeline.append(SkRasterPipeline::load_bgra, &src_ctx);
+            } else {
+                loadTables.src = src;
+                loadTables.r = fSrcGammaTables[2];
+                loadTables.g = fSrcGammaTables[1];
+                loadTables.b = fSrcGammaTables[0];
+                pipeline.append(SkRasterPipeline::load_tables, &loadTables);
+                pipeline.append(SkRasterPipeline::swap_rb);
+            }
+
+            break;
+        case kRGBA_F16_ColorFormat:
+            if (kLinear_SrcGamma != fSrcGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::load_f16, &src_ctx);
+            break;
+        case kRGBA_F32_ColorFormat:
+            if (kLinear_SrcGamma != fSrcGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::load_f32, &src_ctx);
+            break;
+        case kRGBA_U16_BE_ColorFormat:
+            switch (fSrcGamma) {
+                case kLinear_SrcGamma:
+                    pipeline.append(SkRasterPipeline::load_u16_be, &src_ctx);
+                    break;
+                case kSRGB_SrcGamma:
+                    pipeline.append(SkRasterPipeline::load_u16_be, &src_ctx);
+                    pipeline.append(SkRasterPipeline::from_srgb);
+                    break;
+                case kTable_SrcGamma:
+                    loadTables.src = src;
+                    loadTables.r = fSrcGammaTables[0];
+                    loadTables.g = fSrcGammaTables[1];
+                    loadTables.b = fSrcGammaTables[2];
+                    pipeline.append(SkRasterPipeline::load_tables_u16_be, &loadTables);
+                    break;
+            }
+            break;
+        case kRGB_U16_BE_ColorFormat:
+            switch (fSrcGamma) {
+                case kLinear_SrcGamma:
+                    pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src_ctx);
+                    break;
+                case kSRGB_SrcGamma:
+                    pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src_ctx);
+                    pipeline.append(SkRasterPipeline::from_srgb);
+                    break;
+                case kTable_SrcGamma:
+                    loadTables.src = src;
+                    loadTables.r = fSrcGammaTables[0];
+                    loadTables.g = fSrcGammaTables[1];
+                    loadTables.b = fSrcGammaTables[2];
+                    pipeline.append(SkRasterPipeline::load_tables_rgb_u16_be, &loadTables);
+                    break;
+            }
+            break;
+        default:
+            return false;
+    }
+
+    if (!fSrcToDstIsIdentity) {
+        pipeline.append(SkRasterPipeline::matrix_3x4, fSrcToDst);
+
+        if (kRGBA_F16_ColorFormat != dstColorFormat &&
+            kRGBA_F32_ColorFormat != dstColorFormat)
+        {
+            bool need_clamp_0, need_clamp_1;
+            analyze_3x4_matrix(fSrcToDst, &need_clamp_0, &need_clamp_1);
+
+            if (need_clamp_0) { pipeline.append(SkRasterPipeline::clamp_0); }
+            if (need_clamp_1) { pipeline.append(SkRasterPipeline::clamp_1); }
+        }
+    }
+
+    if (kPremul_SkAlphaType == alphaType && SkTransferFunctionBehavior::kRespect == fPremulBehavior)
+    {
+        pipeline.append(SkRasterPipeline::premul);
+    }
+
+    SkJumper_ByteTablesRGBCtx tables;
+    float to_2dot2 = 1/2.2f;
+    switch (fDstGamma) {
+        case kSRGB_DstGamma:
+            pipeline.append(SkRasterPipeline::to_srgb);
+            break;
+        case k2Dot2_DstGamma:
+            pipeline.append(SkRasterPipeline::gamma, &to_2dot2);
+            break;
+        case kTable_DstGamma:
+            tables.r = fDstGammaTables[0];
+            tables.g = fDstGammaTables[1];
+            tables.b = fDstGammaTables[2];
+            tables.n = SkColorSpaceXform_Base::kDstGammaTableSize;
+            pipeline.append(SkRasterPipeline::byte_tables_rgb, &tables);
+        default:
+            break;
+    }
+
+    if (kPremul_SkAlphaType == alphaType && SkTransferFunctionBehavior::kIgnore == fPremulBehavior)
+    {
+        pipeline.append(SkRasterPipeline::premul);
+    }
+
+    switch (dstColorFormat) {
+        case kRGBA_8888_ColorFormat:
+             pipeline.append(SkRasterPipeline::store_8888, &dst_ctx);
+            break;
+        case kBGRA_8888_ColorFormat:
+            pipeline.append(SkRasterPipeline::store_bgra, &dst_ctx);
+            break;
+        case kRGBA_F16_ColorFormat:
+            if (kLinear_DstGamma != fDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f16, &dst_ctx);
+            break;
+        case kRGBA_F32_ColorFormat:
+            if (kLinear_DstGamma != fDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f32, &dst_ctx);
+            break;
+        case kBGR_565_ColorFormat:
+            if (kOpaque_SkAlphaType != alphaType) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_565, &dst_ctx);
+            break;
+        default:
+            return false;
+    }
+    pipeline.run(0,0, len,1);
+    return true;
+}
+
+std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space) {
+    auto xform = skstd::make_unique<SkColorSpaceXform_XYZ>(space, space,
+                                                           SkTransferFunctionBehavior::kRespect);
+    xform->pretendNotToBeIdentityForTesting();
+    return std::move(xform);
 }
 
 bool SkColorSpaceXform::Apply(SkColorSpace* dstCS, ColorFormat dstFormat, void* dst,
@@ -33,5 +606,5 @@
         case kPremul_AlphaOp:      at = kPremul_SkAlphaType;   break;
         case kSrcIsOpaque_AlphaOp: at = kOpaque_SkAlphaType;   break;
     }
-    return SkColorSpaceXform::New(srcCS, dstCS)->apply(dstFormat, dst, srcFormat, src, len, at);
+    return New(srcCS, dstCS)->apply(dstFormat, dst, srcFormat, src, len, at);
 }
diff --git a/src/core/SkColorSpaceXformPriv.h b/src/core/SkColorSpaceXformPriv.h
index 301e741..d1eae73 100644
--- a/src/core/SkColorSpaceXformPriv.h
+++ b/src/core/SkColorSpaceXformPriv.h
@@ -9,14 +9,79 @@
 #define SkColorSpaceXformPriv_DEFINED
 
 #include "SkColorSpaceXform.h"
+#include "SkHalf.h"
+#include "SkSRGB.h"
 
-std::unique_ptr<SkColorSpaceXform> SkMakeColorSpaceXform(SkColorSpace* src,
-                                                         SkColorSpace* dst,
-                                                         SkTransferFunctionBehavior);
+#define SkCSXformPrintfDefined 0
+#define SkCSXformPrintf(...)
 
-std::unique_ptr<SkColorSpaceXform> SkMakeColorSpaceXform_skcms(SkColorSpace* src,
-                                                              SkColorSpace* dst,
-                                                              SkTransferFunctionBehavior);
+// Interpolating lookup in a variably sized table.
+static inline float interp_lut(float input, const float* table, int tableSize) {
+    float index = input * (tableSize - 1);
+    float diff = index - sk_float_floor2int(index);
+    return table[(int) sk_float_floor2int(index)] * (1.0f - diff) +
+           table[(int) sk_float_ceil2int(index)] * diff;
+}
+
+// Expand range from 0-1 to 0-255, then convert.
+static inline uint8_t clamp_normalized_float_to_byte(float v) {
+    // The ordering of the logic is a little strange here in order
+    // to make sure we convert NaNs to 0.
+    v = v * 255.0f;
+    if (v >= 254.5f) {
+        return 255;
+    } else if (v >= 0.5f) {
+        return (uint8_t) (v + 0.5f);
+    } else {
+        return 0;
+    }
+}
+
+static inline float clamp_0_1(float v) {
+    // The ordering of the logic is a little strange here in order
+    // to make sure we convert NaNs to 0.
+    if (v >= 1.0f) {
+        return 1.0f;
+    } else if (v >= 0.0f) {
+        return v;
+    } else {
+        return 0.0f;
+    }
+}
+
+/**
+ *  Invert table lookup.  Ex: what indices corresponds to the input values?
+ *  This will have strange results when the table is not increasing.
+ *  But any sane gamma function will be increasing.
+ *  @param outTableFloat Destination table for float (0-1) results. Can be nullptr if not wanted.
+ *  @param outTableByte  Destination table for byte (0-255) results. Can be nullptr if not wanted.
+ *  @param outTableSize  Number of elements in |outTableFloat| or |outTableBytes|
+ *  @param inTable       The source table to invert
+ *  @param inTableSize   The number of elements in |inTable|
+ */
+static inline void invert_table_gamma(float* outTableFloat, uint8_t* outTableByte,
+                                      int outTableSize, const float* inTable, int inTableSize) {
+    // should never have a gamma table this small anyway, 0/1 are either not allowed
+    // or imply a non-table gamma such as linear/exponential
+    SkASSERT(inTableSize >= 2);
+    int inIndex = 1;
+    for (int outIndex = 0; outIndex < outTableSize; ++outIndex) {
+        const float input = outIndex / (outTableSize - 1.0f);
+        while (inIndex < inTableSize - 1 && inTable[inIndex] < input) {
+            ++inIndex;
+        }
+        const float diff            = input - inTable[inIndex - 1];
+        const float distance        = inTable[inIndex] - inTable[inIndex - 1];
+        const float normalizedIndex = (inIndex - 1) + diff / distance;
+        const float index           = normalizedIndex / (inTableSize - 1);
+        if (outTableByte) {
+            outTableByte[outIndex] = clamp_normalized_float_to_byte(index);
+        }
+        if (outTableFloat) {
+            outTableFloat[outIndex] = clamp_0_1(index);
+        }
+    }
+}
 
 static inline SkColorSpaceXform::ColorFormat select_xform_format(SkColorType colorType) {
     switch (colorType) {
diff --git a/src/core/SkColorSpaceXform_A2B.cpp b/src/core/SkColorSpaceXform_A2B.cpp
new file mode 100644
index 0000000..1e57b5d
--- /dev/null
+++ b/src/core/SkColorSpaceXform_A2B.cpp
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkColorSpaceXform_A2B.h"
+
+#include "SkColorData.h"
+#include "SkColorSpace_A2B.h"
+#include "SkColorSpace_XYZ.h"
+#include "SkColorSpacePriv.h"
+#include "SkColorSpaceXformPriv.h"
+#include "SkMakeUnique.h"
+#include "SkNx.h"
+#include "SkSRGB.h"
+#include "SkTypes.h"
+#include "../jumper/SkJumper.h"
+
+bool SkColorSpaceXform_A2B::apply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat,
+                                  const void* src, int count, SkAlphaType alphaType) const {
+    SkRasterPipeline_<256> pipeline;
+
+    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
+                       dst_ctx = { (void*)dst, 0 };
+
+    switch (srcFormat) {
+        case kBGRA_8888_ColorFormat:
+            pipeline.append(SkRasterPipeline::load_bgra, &src_ctx);
+            break;
+        case kRGBA_8888_ColorFormat:
+            pipeline.append(SkRasterPipeline::load_8888, &src_ctx);
+            break;
+        case kRGBA_U16_BE_ColorFormat:
+            pipeline.append(SkRasterPipeline::load_u16_be, &src_ctx);
+            break;
+        case kRGB_U16_BE_ColorFormat:
+            pipeline.append(SkRasterPipeline::load_rgb_u16_be, &src_ctx);
+            break;
+        default:
+            SkCSXformPrintf("F16/F32 sources must be linear.\n");
+            return false;
+    }
+
+    pipeline.extend(fElementsPipeline);
+
+    if (kPremul_SkAlphaType == alphaType) {
+        pipeline.append(SkRasterPipeline::premul);
+    }
+
+    switch (dstFormat) {
+        case kBGRA_8888_ColorFormat:
+            pipeline.append(SkRasterPipeline::store_bgra, &dst_ctx);
+            break;
+        case kRGBA_8888_ColorFormat:
+            pipeline.append(SkRasterPipeline::store_8888, &dst_ctx);
+            break;
+        case kRGBA_F16_ColorFormat:
+            if (!fLinearDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f16, &dst_ctx);
+            break;
+        case kRGBA_F32_ColorFormat:
+            if (!fLinearDstGamma) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_f32, &dst_ctx);
+            break;
+        case kBGR_565_ColorFormat:
+            if (kOpaque_SkAlphaType != alphaType) {
+                return false;
+            }
+            pipeline.append(SkRasterPipeline::store_565, &dst_ctx);
+            break;
+        default:
+            return false;
+    }
+    pipeline.run(0,0, count,1);
+
+    return true;
+}
+
+static inline bool gamma_to_parametric(SkColorSpaceTransferFn* coeffs, const SkGammas& gammas,
+                                       int channel) {
+    switch (gammas.type(channel)) {
+        case SkGammas::Type::kNamed_Type:
+            return named_to_parametric(coeffs, gammas.data(channel).fNamed);
+        case SkGammas::Type::kValue_Type:
+            value_to_parametric(coeffs, gammas.data(channel).fValue);
+            return true;
+        case SkGammas::Type::kParam_Type:
+            *coeffs = gammas.params(channel);
+            return true;
+        default:
+            return false;
+    }
+}
+
+SkColorSpaceXform_A2B::SkColorSpaceXform_A2B(SkColorSpace_A2B* srcSpace,
+                                             SkColorSpace_XYZ* dstSpace)
+    : fElementsPipeline(&fAlloc)
+    , fLinearDstGamma(kLinear_SkGammaNamed == dstSpace->gammaNamed()) {
+#if (SkCSXformPrintfDefined)
+    static const char* debugGammaNamed[4] = {
+        "Linear", "SRGB", "2.2", "NonStandard"
+    };
+    static const char* debugGammas[5] = {
+        "None", "Named", "Value", "Table", "Param"
+    };
+#endif
+    int currentChannels;
+    switch (srcSpace->iccType()) {
+        case SkColorSpace::kRGB_Type:
+            currentChannels = 3;
+            break;
+        case SkColorSpace::kCMYK_Type: {
+            currentChannels = 4;
+            // CMYK images from JPEGs (the only format that supports it) are actually
+            // inverted CMYK, so we need to invert every channel.
+            fElementsPipeline.append(SkRasterPipeline::invert);
+            break;
+        }
+        default:
+            currentChannels = 0;
+            SkASSERT(false);
+    }
+    // add in all input color space -> PCS xforms
+    for (int i = 0; i < srcSpace->count(); ++i) {
+        const SkColorSpace_A2B::Element& e = srcSpace->element(i);
+        SkASSERT(e.inputChannels() == currentChannels);
+        currentChannels = e.outputChannels();
+        switch (e.type()) {
+            case SkColorSpace_A2B::Element::Type::kGammaNamed: {
+                if (kLinear_SkGammaNamed == e.gammaNamed()) {
+                    break;
+                }
+
+                // Take the fast path for ordinary sRGB.
+                if (3 == currentChannels && kSRGB_SkGammaNamed == e.gammaNamed()) {
+                    SkCSXformPrintf("fast path from sRGB\n");
+                    fElementsPipeline.append(SkRasterPipeline::from_srgb);
+                    break;
+                }
+
+                SkCSXformPrintf("Gamma stage added: %s\n", debugGammaNamed[(int)e.gammaNamed()]);
+                auto fn = fAlloc.make<SkColorSpaceTransferFn>();
+                SkAssertResult(named_to_parametric(fn, e.gammaNamed()));
+
+                if (is_just_gamma(*fn)) {
+                    fElementsPipeline.append(SkRasterPipeline::gamma, &fn->fG);
+                } else {
+                    fElementsPipeline.append(SkRasterPipeline::parametric_r, fn);
+                    fElementsPipeline.append(SkRasterPipeline::parametric_g, fn);
+                    fElementsPipeline.append(SkRasterPipeline::parametric_b, fn);
+                }
+                break;
+            }
+            case SkColorSpace_A2B::Element::Type::kGammas: {
+                const SkGammas& gammas = e.gammas();
+                SkCSXformPrintf("Gamma stage added:");
+                for (int channel = 0; channel < gammas.channels(); ++channel) {
+                    SkCSXformPrintf("  %s", debugGammas[(int)gammas.type(channel)]);
+                }
+                SkCSXformPrintf("\n");
+                bool gammaNeedsRef = false;
+                for (int channel = 0; channel < gammas.channels(); ++channel) {
+                    if (SkGammas::Type::kTable_Type == gammas.type(channel)) {
+                        SkTableTransferFn table = {
+                                gammas.table(channel),
+                                gammas.data(channel).fTable.fSize,
+                        };
+
+                        gammaNeedsRef |= !this->buildTableFn(&table);
+                        this->addTableFn(table, channel);
+                    } else {
+                        SkColorSpaceTransferFn fn;
+                        SkAssertResult(gamma_to_parametric(&fn, gammas, channel));
+                        this->addTransferFn(fn, channel);
+                    }
+                }
+                if (gammaNeedsRef) {
+                    this->copy(sk_ref_sp(&gammas));
+                }
+                break;
+            }
+            case SkColorSpace_A2B::Element::Type::kCLUT: {
+                SkCSXformPrintf("CLUT (%d -> %d) stage added\n", e.colorLUT().inputChannels(),
+                                                                 e.colorLUT().outputChannels());
+
+                struct Ctx : SkJumper_ColorLookupTableCtx {
+                    sk_sp<const SkColorLookUpTable> clut;
+                };
+                auto ctx = fAlloc.make<Ctx>();
+                ctx->clut  = sk_ref_sp(&e.colorLUT());
+                ctx->table = ctx->clut->table();
+                for (int i = 0; i < ctx->clut->inputChannels(); i++) {
+                    ctx->limits[i] = ctx->clut->gridPoints(i);
+                }
+
+                switch  (e.colorLUT().inputChannels()) {
+                    case 3: fElementsPipeline.append(SkRasterPipeline::clut_3D, ctx); break;
+                    case 4: fElementsPipeline.append(SkRasterPipeline::clut_4D, ctx); break;
+                    default: SkDEBUGFAIL("need to handle 1 or 2 channel color lookup tables.");
+                }
+                fElementsPipeline.append(SkRasterPipeline::clamp_0);
+                fElementsPipeline.append(SkRasterPipeline::clamp_1);
+                break;
+            }
+            case SkColorSpace_A2B::Element::Type::kMatrix:
+                if (!e.matrix().isIdentity()) {
+                    SkCSXformPrintf("Matrix stage added\n");
+                    addMatrix(e.matrix());
+                }
+                break;
+        }
+    }
+
+    // Lab PCS -> XYZ PCS
+    if (SkColorSpace_A2B::PCS::kLAB == srcSpace->pcs()) {
+        SkCSXformPrintf("Lab -> XYZ element added\n");
+        fElementsPipeline.append(SkRasterPipeline::lab_to_xyz);
+    }
+
+    // we should now be in XYZ PCS
+    SkASSERT(3 == currentChannels);
+
+    // and XYZ PCS -> output color space xforms
+    if (!dstSpace->fromXYZD50()->isIdentity()) {
+        addMatrix(*dstSpace->fromXYZD50());
+    }
+
+    switch (dstSpace->gammaNamed()) {
+        case kLinear_SkGammaNamed:
+            // do nothing
+            break;
+        case k2Dot2Curve_SkGammaNamed: {
+            fElementsPipeline.append(SkRasterPipeline::gamma, this->copy(1/2.2f));
+            break;
+        }
+        case kSRGB_SkGammaNamed:
+            fElementsPipeline.append(SkRasterPipeline::to_srgb);
+            break;
+        case kNonStandard_SkGammaNamed: {
+            for (int channel = 0; channel < 3; ++channel) {
+                const SkGammas& gammas = *dstSpace->gammas();
+                if (SkGammas::Type::kTable_Type == gammas.type(channel)) {
+                    static constexpr int kInvTableSize = 256;
+                    auto storage = fAlloc.makeArray<float>(kInvTableSize);
+                    invert_table_gamma(storage, nullptr, kInvTableSize,
+                                       gammas.table(channel),
+                                       gammas.data(channel).fTable.fSize);
+                    SkTableTransferFn table = { storage, kInvTableSize };
+                    this->addTableFn(table, channel);
+                } else {
+                    SkColorSpaceTransferFn fn;
+                    SkAssertResult(gamma_to_parametric(&fn, gammas, channel));
+                    this->addTransferFn(fn.invert(), channel);
+                }
+            }
+        }
+        break;
+    }
+}
+
+void SkColorSpaceXform_A2B::addTransferFn(const SkColorSpaceTransferFn& fn, int channelIndex) {
+    switch (channelIndex) {
+        case 0:
+            fElementsPipeline.append(SkRasterPipeline::parametric_r, this->copy(fn));
+            break;
+        case 1:
+            fElementsPipeline.append(SkRasterPipeline::parametric_g, this->copy(fn));
+            break;
+        case 2:
+            fElementsPipeline.append(SkRasterPipeline::parametric_b, this->copy(fn));
+            break;
+        case 3:
+            fElementsPipeline.append(SkRasterPipeline::parametric_a, this->copy(fn));
+            break;
+        default:
+            SkASSERT(false);
+    }
+}
+
+/**
+ *  |fn| is an in-out parameter.  If the table is too small to perform reasonable table-lookups
+ *  without interpolation, we will build a bigger table.
+ *
+ *  This returns false if we use the original table, meaning we do nothing here but need to keep
+ *  a reference to the original table.  This returns true if we build a new table and the original
+ *  table can be discarded.
+ */
+bool SkColorSpaceXform_A2B::buildTableFn(SkTableTransferFn* fn) {
+    // Arbitrary, but seems like a reasonable guess.
+    static constexpr int kMinTableSize = 256;
+
+    if (fn->fSize >= kMinTableSize) {
+        return false;
+    }
+
+    float* outTable = fAlloc.makeArray<float>(kMinTableSize);
+    float step = 1.0f / (kMinTableSize - 1);
+    for (int i = 0; i < kMinTableSize; i++) {
+        outTable[i] = interp_lut(i * step, fn->fData, fn->fSize);
+    }
+
+    fn->fData = outTable;
+    fn->fSize = kMinTableSize;
+    return true;
+}
+
+void SkColorSpaceXform_A2B::addTableFn(const SkTableTransferFn& fn, int channelIndex) {
+    switch (channelIndex) {
+        case 0:
+            fElementsPipeline.append(SkRasterPipeline::table_r, this->copy(fn));
+            break;
+        case 1:
+            fElementsPipeline.append(SkRasterPipeline::table_g, this->copy(fn));
+            break;
+        case 2:
+            fElementsPipeline.append(SkRasterPipeline::table_b, this->copy(fn));
+            break;
+        case 3:
+            fElementsPipeline.append(SkRasterPipeline::table_a, this->copy(fn));
+            break;
+        default:
+            SkASSERT(false);
+    }
+}
+
+void SkColorSpaceXform_A2B::addMatrix(const SkMatrix44& m44) {
+    auto m = fAlloc.makeArray<float>(12);
+    m[0] = m44.get(0,0); m[ 1] = m44.get(1,0); m[ 2] = m44.get(2,0);
+    m[3] = m44.get(0,1); m[ 4] = m44.get(1,1); m[ 5] = m44.get(2,1);
+    m[6] = m44.get(0,2); m[ 7] = m44.get(1,2); m[ 8] = m44.get(2,2);
+    m[9] = m44.get(0,3); m[10] = m44.get(1,3); m[11] = m44.get(2,3);
+
+    SkASSERT(m44.get(3,0) == 0.0f);
+    SkASSERT(m44.get(3,1) == 0.0f);
+    SkASSERT(m44.get(3,2) == 0.0f);
+    SkASSERT(m44.get(3,3) == 1.0f);
+
+    fElementsPipeline.append(SkRasterPipeline::matrix_3x4, m);
+    fElementsPipeline.append(SkRasterPipeline::clamp_0);
+    fElementsPipeline.append(SkRasterPipeline::clamp_1);
+}
diff --git a/src/core/SkColorSpaceXform_A2B.h b/src/core/SkColorSpaceXform_A2B.h
new file mode 100644
index 0000000..65fbbfd
--- /dev/null
+++ b/src/core/SkColorSpaceXform_A2B.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkColorSpaceXform_A2B_DEFINED
+#define SkColorSpaceXform_A2B_DEFINED
+
+#include "SkArenaAlloc.h"
+#include "SkColorSpaceXform.h"
+#include "SkRasterPipeline.h"
+
+class SkColorSpace_A2B;
+class SkColorSpace_XYZ;
+
+struct SkTableTransferFn {
+    const float* fData;
+    int          fSize;
+};
+
+class SkColorSpaceXform_A2B : public SkColorSpaceXform {
+public:
+    SkColorSpaceXform_A2B(SkColorSpace_A2B* srcSpace, SkColorSpace_XYZ* dstSpace);
+
+    bool apply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat, const void* src,
+               int count, SkAlphaType alphaType) const override;
+
+private:
+    void addTransferFn(const SkColorSpaceTransferFn& fn, int channelIndex);
+
+    bool buildTableFn(SkTableTransferFn* table);
+    void addTableFn(const SkTableTransferFn& table, int channelIndex);
+
+    void addMatrix(const SkMatrix44& matrix);
+
+    SkRasterPipeline fElementsPipeline;
+    bool             fLinearDstGamma;
+    SkArenaAlloc     fAlloc{128};  // TODO: tune?
+
+    template <typename T>
+    T* copy(const T& val) { return fAlloc.make<T>(val); }
+};
+
+#endif
diff --git a/src/core/SkColorSpaceXform_Base.h b/src/core/SkColorSpaceXform_Base.h
new file mode 100644
index 0000000..2e02023
--- /dev/null
+++ b/src/core/SkColorSpaceXform_Base.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkColorSpaceXform_Base_DEFINED
+#define SkColorSpaceXform_Base_DEFINED
+
+#include "SkColorSpace.h"
+#include "SkColorSpaceXform.h"
+#include "SkTemplates.h"
+
+class SkColorSpace_XYZ;
+
+class SkColorSpaceXform_Base {
+public:
+    // A somewhat more powerful SkColorSpaceXform::New() that allows tweaking premulBehavior.
+    static std::unique_ptr<SkColorSpaceXform> New(SkColorSpace* srcSpace,
+                                                  SkColorSpace* dstSpace,
+                                                  SkTransferFunctionBehavior premulBehavior);
+
+    static constexpr int kDstGammaTableSize = 1024;
+    static void BuildDstGammaTables(const uint8_t* outGammaTables[3],
+                                    uint8_t* gammaTableStorage,
+                                    const SkColorSpace_XYZ* space,
+                                    bool gammasAreMatching);
+};
+
+class SkColorSpaceXform_XYZ : public SkColorSpaceXform {
+public:
+    SkColorSpaceXform_XYZ(SkColorSpace_XYZ* src, SkColorSpace_XYZ* dst, SkTransferFunctionBehavior);
+
+    bool apply(ColorFormat dstFormat, void* dst,
+               ColorFormat srcFormat, const void* src,
+               int count, SkAlphaType alphaType) const override;
+
+    void pretendNotToBeIdentityForTesting() {
+        fSrcToDstIsIdentity = false;
+    }
+
+private:
+    enum SrcGamma {
+        kLinear_SrcGamma,
+        kTable_SrcGamma,
+        kSRGB_SrcGamma,
+    };
+
+    enum DstGamma {
+        kLinear_DstGamma,
+        kSRGB_DstGamma,
+        k2Dot2_DstGamma,
+        kTable_DstGamma,
+    };
+
+    // These tables pointers may point into fSrcStorage/fDstStorage or into pre-baked tables.
+    const float*               fSrcGammaTables[3];
+    const uint8_t*             fDstGammaTables[3];
+    SkAutoTMalloc<float>       fSrcStorage;
+    sk_sp<SkData>              fDstStorage;
+
+    float                      fSrcToDst[12];
+    bool                       fSrcToDstIsIdentity;
+    bool                       fColorSpacesAreIdentical;
+    SrcGamma                   fSrcGamma;
+    DstGamma                   fDstGamma;
+    SkTransferFunctionBehavior fPremulBehavior;
+};
+
+// For testing.  Bypasses opts for when src and dst color spaces are equal.
+std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
+
+#if defined(SK_USE_SKCMS)
+std::unique_ptr<SkColorSpaceXform> MakeSkcmsXform(SkColorSpace* src, SkColorSpace* dst,
+                                                  SkTransferFunctionBehavior premulBehavior);
+#endif
+
+#endif
diff --git a/src/core/SkColorSpaceXform_skcms.cpp b/src/core/SkColorSpaceXform_skcms.cpp
index 720a6c8..8696e4b 100644
--- a/src/core/SkColorSpaceXform_skcms.cpp
+++ b/src/core/SkColorSpaceXform_skcms.cpp
@@ -17,7 +17,15 @@
                             skcms_AlphaFormat premulFormat)
         : fSrcProfile(srcProfile)
         , fDstProfile(dstProfile)
-        , fPremulFormat(premulFormat) {}
+        , fPremulFormat(premulFormat) {
+    #ifndef SK_DONT_OPTIMIZE_SRC_PROFILES_FOR_SPEED
+        skcms_OptimizeForSpeed(&fSrcProfile);
+    #endif
+    #ifndef SK_DONT_OPTIMIZE_DST_PROFILES_FOR_SPEED
+        // (This doesn't do anything yet, but we'd sure like it to.)
+        skcms_OptimizeForSpeed(&fDstProfile);
+    #endif
+    }
 
     bool apply(ColorFormat, void*, ColorFormat, const void*, int, SkAlphaType) const override;
 
@@ -80,11 +88,10 @@
     }
 }
 
-std::unique_ptr<SkColorSpaceXform> SkMakeColorSpaceXform_skcms(SkColorSpace* src,
-                                                               SkColorSpace* dst,
-                                                               SkTransferFunctionBehavior premul) {
+std::unique_ptr<SkColorSpaceXform> MakeSkcmsXform(SkColorSpace* src, SkColorSpace* dst,
+                                                  SkTransferFunctionBehavior premulBehavior) {
     // Construct skcms_ICCProfiles from each color space. For now, support A2B and XYZ.
-    // Eventually, only need to support XYZ. Map premul to one of the two premul formats
+    // Eventually, only need to support XYZ. Map premulBehavior to one of the two premul formats
     // in skcms.
     skcms_ICCProfile srcProfile, dstProfile;
 
@@ -95,9 +102,8 @@
         return nullptr;
     }
 
-    skcms_AlphaFormat premulFormat = SkTransferFunctionBehavior::kRespect == premul
-            ? skcms_AlphaFormat_PremulLinear
-            : skcms_AlphaFormat_PremulAsEncoded;
+    skcms_AlphaFormat premulFormat = SkTransferFunctionBehavior::kRespect == premulBehavior
+            ? skcms_AlphaFormat_PremulLinear : skcms_AlphaFormat_PremulAsEncoded;
     return skstd::make_unique<SkColorSpaceXform_skcms>(srcProfile, dstProfile, premulFormat);
 }
 
diff --git a/src/core/SkColorSpaceXformer.cpp b/src/core/SkColorSpaceXformer.cpp
index 0b536ff8..14be534 100644
--- a/src/core/SkColorSpaceXformer.cpp
+++ b/src/core/SkColorSpaceXformer.cpp
@@ -7,7 +7,7 @@
 
 #include "SkColorFilter.h"
 #include "SkColorSpaceXformer.h"
-#include "SkColorSpaceXformPriv.h"
+#include "SkColorSpaceXform_Base.h"
 #include "SkDrawLooper.h"
 #include "SkGradientShader.h"
 #include "SkImage.h"
@@ -25,7 +25,7 @@
 SkColorSpaceXformer::~SkColorSpaceXformer() {}
 
 std::unique_ptr<SkColorSpaceXformer> SkColorSpaceXformer::Make(sk_sp<SkColorSpace> dst) {
-    std::unique_ptr<SkColorSpaceXform> fromSRGB = SkMakeColorSpaceXform(
+    std::unique_ptr<SkColorSpaceXform> fromSRGB = SkColorSpaceXform_Base::New(
             SkColorSpace::MakeSRGB().get(), dst.get(), SkTransferFunctionBehavior::kIgnore);
 
     return fromSRGB
diff --git a/src/core/SkColorSpace_XYZ.cpp b/src/core/SkColorSpace_XYZ.cpp
index 42b5b5d..6e5c7d8 100644
--- a/src/core/SkColorSpace_XYZ.cpp
+++ b/src/core/SkColorSpace_XYZ.cpp
@@ -7,7 +7,7 @@
 
 #include "SkColorSpace_XYZ.h"
 #include "SkColorSpacePriv.h"
-#include "SkColorSpaceXformPriv.h"
+#include "SkColorSpaceXform_Base.h"
 #include "SkOpts.h"
 
 SkColorSpace_XYZ::SkColorSpace_XYZ(SkGammaNamed gammaNamed, const SkMatrix44& toXYZD50)
@@ -103,6 +103,23 @@
     return sk_sp<SkColorSpace>(new SkColorSpace_XYZ(fGammaNamed, fGammas, spin, fProfileData));
 }
 
+void SkColorSpace_XYZ::toDstGammaTables(const uint8_t* tables[3], sk_sp<SkData>* storage,
+                                         int numTables) const {
+    fToDstGammaOnce([this, numTables] {
+        const bool gammasAreMatching = numTables <= 1;
+        fDstStorage =
+                SkData::MakeUninitialized(numTables * SkColorSpaceXform_Base::kDstGammaTableSize);
+        SkColorSpaceXform_Base::BuildDstGammaTables(fToDstGammaTables,
+                                                    (uint8_t*) fDstStorage->writable_data(), this,
+                                                    gammasAreMatching);
+    });
+
+    *storage = fDstStorage;
+    tables[0] = fToDstGammaTables[0];
+    tables[1] = fToDstGammaTables[1];
+    tables[2] = fToDstGammaTables[2];
+}
+
 sk_sp<SkColorSpace> SkColorSpace_XYZ::makeNonlinearBlending() const {
     if (this->nonlinearBlending()) {
         return sk_ref_sp(const_cast<SkColorSpace_XYZ*>(this));
diff --git a/src/core/SkColorSpace_XYZ.h b/src/core/SkColorSpace_XYZ.h
index a6f6422..d0984ec 100644
--- a/src/core/SkColorSpace_XYZ.h
+++ b/src/core/SkColorSpace_XYZ.h
@@ -54,6 +54,10 @@
     mutable SkMatrix44     fFromXYZD50;
     mutable SkOnce         fFromXYZOnce;
 
+    mutable sk_sp<SkData>  fDstStorage;
+    mutable const uint8_t* fToDstGammaTables[3];
+    mutable SkOnce         fToDstGammaOnce;
+
     bool fNonlinearBlending = false;
 
     friend class SkColorSpace;
diff --git a/src/core/SkConvertPixels.cpp b/src/core/SkConvertPixels.cpp
index 1eafe8a..d984aca 100644
--- a/src/core/SkConvertPixels.cpp
+++ b/src/core/SkConvertPixels.cpp
@@ -5,6 +5,7 @@
  * found in the LICENSE file.
  */
 
+#include "SkColorSpaceXform_Base.h"
 #include "SkColorSpaceXformPriv.h"
 #include "SkColorSpacePriv.h"
 #include "SkColorTable.h"
@@ -148,7 +149,7 @@
     }
 
     std::unique_ptr<SkColorSpaceXform> xform =
-            SkMakeColorSpaceXform(srcInfo.colorSpace(), dstInfo.colorSpace(), behavior);
+            SkColorSpaceXform_Base::New(srcInfo.colorSpace(), dstInfo.colorSpace(), behavior);
     if (!xform) {
         return false;
     }
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 40921f1..5eab723 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -40,7 +40,7 @@
     M(clamp_0) M(clamp_1) M(clamp_a) M(clamp_a_dst)                \
     M(unpremul) M(premul) M(premul_dst)                            \
     M(force_opaque) M(force_opaque_dst)                            \
-    M(set_rgb) M(swap_rb)                                          \
+    M(set_rgb) M(swap_rb) M(invert)                                \
     M(from_srgb) M(from_srgb_dst) M(to_srgb)                       \
     M(black_color) M(white_color) M(uniform_color)                 \
     M(seed_shader) M(dither)                                       \
@@ -54,7 +54,8 @@
     M(load_bgra) M(load_bgra_dst) M(store_bgra) M(gather_bgra)     \
     M(load_1010102) M(load_1010102_dst) M(store_1010102) M(gather_1010102) \
     M(bilerp_clamp_8888)                                           \
-    M(store_u16_be)                                                \
+    M(load_u16_be) M(load_rgb_u16_be) M(store_u16_be)              \
+    M(load_tables_u16_be) M(load_tables_rgb_u16_be) M(load_tables) \
     M(load_rgba) M(store_rgba)                                     \
     M(scale_u8) M(scale_565) M(scale_1_float)                      \
     M( lerp_u8) M( lerp_565) M( lerp_1_float)                      \
@@ -71,9 +72,11 @@
     M(matrix_perspective)                                          \
     M(parametric_r) M(parametric_g) M(parametric_b)                \
     M(parametric_a) M(gamma) M(gamma_dst)                          \
-    M(mirror_x)   M(repeat_x)                                      \
-    M(mirror_y)   M(repeat_y)                                      \
-    M(decal_x)    M(decal_y)   M(decal_x_and_y)                    \
+    M(table_r) M(table_g) M(table_b) M(table_a)                    \
+    M(lab_to_xyz)                                                  \
+                 M(mirror_x)   M(repeat_x)                         \
+                 M(mirror_y)   M(repeat_y)                         \
+                 M(decal_x)    M(decal_y)   M(decal_x_and_y)       \
     M(check_decal_mask)                                            \
     M(negate_x)                                                    \
     M(bilinear_nx) M(bilinear_px) M(bilinear_ny) M(bilinear_py)    \
@@ -95,8 +98,9 @@
     M(alter_2pt_conical_unswap)                                    \
     M(mask_2pt_conical_nan)                                        \
     M(mask_2pt_conical_degenerates) M(apply_vector_mask)           \
-    M(byte_tables)                                                 \
+    M(byte_tables) M(byte_tables_rgb)                              \
     M(rgb_to_hsl) M(hsl_to_rgb)                                    \
+    M(clut_3D) M(clut_4D)                                          \
     M(gauss_a_to_rgba)
 
 class SkRasterPipeline {
diff --git a/src/jumper/SkJumper.h b/src/jumper/SkJumper.h
index a826c5a..d4c4698 100644
--- a/src/jumper/SkJumper.h
+++ b/src/jumper/SkJumper.h
@@ -61,6 +61,21 @@
     float* read_from = rgba;
 };
 
+struct SkJumper_LoadTablesCtx {
+    const void* src;
+    const float *r, *g, *b;
+};
+
+struct SkJumper_TableCtx {
+    const float* table;
+    int          size;
+};
+
+struct SkJumper_ByteTablesRGBCtx {
+    const uint8_t *r, *g, *b;
+    int n;
+};
+
 // This should line up with the memory layout of SkColorSpaceTransferFn.
 struct SkJumper_ParametricTransferFunction {
     float G, A,B,C,D,E,F;
@@ -84,4 +99,9 @@
     uint16_t rgba[4];  // [0,255] in a 16-bit lane.
 };
 
+struct SkJumper_ColorLookupTableCtx {
+    const float* table;
+    int limits[4];
+};
+
 #endif//SkJumper_DEFINED
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 9fe3d94..34a0aa5 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1275,6 +1275,12 @@
     r = b;
     b = tmp;
 }
+STAGE(invert, Ctx::None) {
+    r = inv(r);
+    g = inv(g);
+    b = inv(b);
+    a = inv(a);
+}
 
 STAGE(move_src_dst, Ctx::None) {
     dr = r;
@@ -1469,6 +1475,38 @@
     a = lerp(da, a, ca);
 }
 
+STAGE(load_tables, const SkJumper_LoadTablesCtx* c) {
+    auto px = load<U32>((const uint32_t*)c->src + dx, tail);
+    r = gather(c->r, (px      ) & 0xff);
+    g = gather(c->g, (px >>  8) & 0xff);
+    b = gather(c->b, (px >> 16) & 0xff);
+    a = cast(        (px >> 24)) * (1/255.0f);
+}
+STAGE(load_tables_u16_be, const SkJumper_LoadTablesCtx* c) {
+    auto ptr = (const uint16_t*)c->src + 4*dx;
+
+    U16 R,G,B,A;
+    load4(ptr, tail, &R,&G,&B,&A);
+
+    // c->src is big-endian, so & 0xff grabs the 8 most signficant bits.
+    r = gather(c->r, expand(R) & 0xff);
+    g = gather(c->g, expand(G) & 0xff);
+    b = gather(c->b, expand(B) & 0xff);
+    a = (1/65535.0f) * cast(expand(bswap(A)));
+}
+STAGE(load_tables_rgb_u16_be, const SkJumper_LoadTablesCtx* c) {
+    auto ptr = (const uint16_t*)c->src + 3*dx;
+
+    U16 R,G,B;
+    load3(ptr, tail, &R,&G,&B);
+
+    // c->src is big-endian, so & 0xff grabs the 8 most signficant bits.
+    r = gather(c->r, expand(R) & 0xff);
+    g = gather(c->g, expand(G) & 0xff);
+    b = gather(c->b, expand(B) & 0xff);
+    a = 1.0f;
+}
+
 STAGE(byte_tables, const void* ctx) {  // TODO: rename Tables SkJumper_ByteTablesCtx
     struct Tables { const uint8_t *r, *g, *b, *a; };
     auto tables = (const Tables*)ctx;
@@ -1479,6 +1517,21 @@
     a = from_byte(gather(tables->a, to_unorm(a, 255)));
 }
 
+STAGE(byte_tables_rgb, const SkJumper_ByteTablesRGBCtx* ctx) {
+    int scale = ctx->n - 1;
+    r = from_byte(gather(ctx->r, to_unorm(r, scale)));
+    g = from_byte(gather(ctx->g, to_unorm(g, scale)));
+    b = from_byte(gather(ctx->b, to_unorm(b, scale)));
+}
+
+SI F table(F v, const SkJumper_TableCtx* ctx) {
+    return gather(ctx->table, to_unorm(v, ctx->size - 1));
+}
+STAGE(table_r, const SkJumper_TableCtx* ctx) { r = table(r, ctx); }
+STAGE(table_g, const SkJumper_TableCtx* ctx) { g = table(g, ctx); }
+STAGE(table_b, const SkJumper_TableCtx* ctx) { b = table(b, ctx); }
+STAGE(table_a, const SkJumper_TableCtx* ctx) { a = table(a, ctx); }
+
 SI F parametric(F v, const SkJumper_ParametricTransferFunction* ctx) {
     F r = if_then_else(v <= ctx->D, mad(ctx->C, v, ctx->F)
                                   , approx_powf(mad(ctx->A, v, ctx->B), ctx->G) + ctx->E);
@@ -1500,6 +1553,25 @@
     db = approx_powf(db, *G);
 }
 
+STAGE(lab_to_xyz, Ctx::None) {
+    F L = r * 100.0f,
+      A = g * 255.0f - 128.0f,
+      B = b * 255.0f - 128.0f;
+
+    F Y = (L + 16.0f) * (1/116.0f),
+      X = Y + A*(1/500.0f),
+      Z = Y - B*(1/200.0f);
+
+    X = if_then_else(X*X*X > 0.008856f, X*X*X, (X - (16/116.0f)) * (1/7.787f));
+    Y = if_then_else(Y*Y*Y > 0.008856f, Y*Y*Y, (Y - (16/116.0f)) * (1/7.787f));
+    Z = if_then_else(Z*Z*Z > 0.008856f, Z*Z*Z, (Z - (16/116.0f)) * (1/7.787f));
+
+    // Adjust to D50 illuminant.
+    r = X * 0.96422f;
+    g = Y           ;
+    b = Z * 0.82521f;
+}
+
 STAGE(load_a8, const SkJumper_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
 
@@ -1702,6 +1774,28 @@
                               , to_half(a));
 }
 
+STAGE(load_u16_be, const SkJumper_MemoryCtx* ctx) {
+    auto ptr = ptr_at_xy<const uint16_t>(ctx, 4*dx,dy);
+
+    U16 R,G,B,A;
+    load4(ptr,tail, &R,&G,&B,&A);
+
+    r = (1/65535.0f) * cast(expand(bswap(R)));
+    g = (1/65535.0f) * cast(expand(bswap(G)));
+    b = (1/65535.0f) * cast(expand(bswap(B)));
+    a = (1/65535.0f) * cast(expand(bswap(A)));
+}
+STAGE(load_rgb_u16_be, const SkJumper_MemoryCtx* ctx) {
+    auto ptr = ptr_at_xy<const uint16_t>(ctx, 3*dx,dy);
+
+    U16 R,G,B;
+    load3(ptr,tail, &R,&G,&B);
+
+    r = (1/65535.0f) * cast(expand(bswap(R)));
+    g = (1/65535.0f) * cast(expand(bswap(G)));
+    b = (1/65535.0f) * cast(expand(bswap(B)));
+    a = 1.0f;
+}
 STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
 
@@ -2101,6 +2195,58 @@
     load4(c->read_from,0, &r,&g,&b,&a);
 }
 
+// Our general strategy is to recursively interpolate each dimension,
+// accumulating the index to sample at, and our current pixel stride to help accumulate the index.
+template <int dim>
+SI void color_lookup_table(const SkJumper_ColorLookupTableCtx* ctx,
+                           F& r, F& g, F& b, F a, U32 index, U32 stride) {
+    // We'd logically like to sample this dimension at x.
+    int limit = ctx->limits[dim-1];
+    F src;
+    switch(dim) {
+        case 1: src = r; break;
+        case 2: src = g; break;
+        case 3: src = b; break;
+        case 4: src = a; break;
+    }
+    F x = src * (limit - 1);
+
+    // We can't index an array by a float (darn) so we have to snap to nearby integers lo and hi.
+    U32 lo = trunc_(x          ),
+        hi = trunc_(x + 0.9999f);
+
+    // Recursively sample at lo and hi.
+    F lr = r, lg = g, lb = b,
+      hr = r, hg = g, hb = b;
+    color_lookup_table<dim-1>(ctx, lr,lg,lb,a, stride*lo + index, stride*limit);
+    color_lookup_table<dim-1>(ctx, hr,hg,hb,a, stride*hi + index, stride*limit);
+
+    // Linearly interpolate those colors based on their distance to x.
+    F t = x - cast(lo);
+    r = lerp(lr, hr, t);
+    g = lerp(lg, hg, t);
+    b = lerp(lb, hb, t);
+}
+
+// Bottom out our recursion at 0 dimensions, i.e. just return the colors at index.
+template<>
+inline void color_lookup_table<0>(const SkJumper_ColorLookupTableCtx* ctx,
+                                  F& r, F& g, F& b, F a, U32 index, U32 stride) {
+    r = gather(ctx->table, 3*index+0);
+    g = gather(ctx->table, 3*index+1);
+    b = gather(ctx->table, 3*index+2);
+}
+
+STAGE(clut_3D, const SkJumper_ColorLookupTableCtx* ctx) {
+    color_lookup_table<3>(ctx, r,g,b,a, 0,1);
+    // This 3D color lookup table leaves alpha alone.
+}
+STAGE(clut_4D, const SkJumper_ColorLookupTableCtx* ctx) {
+    color_lookup_table<4>(ctx, r,g,b,a, 0,1);
+    // "a" was really CMYK's K, so we just set alpha opaque.
+    a = 1.0f;
+}
+
 STAGE(gauss_a_to_rgba, Ctx::None) {
     // x = 1 - x;
     // exp(-x * x * 4) - 0.018f;
@@ -2551,6 +2697,13 @@
     a = da;
 }
 
+STAGE_PP(invert, Ctx::None) {
+    r = inv(r);
+    g = inv(g);
+    b = inv(b);
+    a = inv(a);
+}
+
 // ~~~~~~ Blend modes ~~~~~~ //
 
 // The same logic applied to all 4 channels.
@@ -3245,13 +3398,15 @@
         load_f16    , load_f16_dst    , store_f16    , gather_f16,
         load_f32    , load_f32_dst    , store_f32    , gather_f32,
         load_1010102, load_1010102_dst, store_1010102, gather_1010102,
-        store_u16_be,
-        byte_tables,
+        load_u16_be, load_rgb_u16_be, store_u16_be,
+        load_tables_u16_be, load_tables_rgb_u16_be,
+        load_tables, byte_tables, byte_tables_rgb,
         colorburn, colordodge, softlight, hue, saturation, color, luminosity,
         matrix_3x4, matrix_4x5, matrix_4x3,
         parametric_r, parametric_g, parametric_b, parametric_a,
+        table_r, table_g, table_b, table_a,
         gamma, gamma_dst,
-        rgb_to_hsl, hsl_to_rgb,
+        lab_to_xyz, rgb_to_hsl, hsl_to_rgb, clut_3D, clut_4D,
         gauss_a_to_rgba,
         mirror_x, repeat_x,
         mirror_y, repeat_y,