add virtuals to optimize composing colorfilters

BUG=skia:

Review URL: https://codereview.chromium.org/968993004
diff --git a/include/core/SkColorFilter.h b/include/core/SkColorFilter.h
index bb62805..31a4365 100644
--- a/include/core/SkColorFilter.h
+++ b/include/core/SkColorFilter.h
@@ -98,6 +98,15 @@
     virtual uint32_t getFlags() const { return 0; }
 
     /**
+     *  If this subclass can optimally createa composition with the inner filter, return it as
+     *  a new filter (which the caller must unref() when it is done). If no such optimization
+     *  is known, return NULL.
+     *
+     *  e.g. result(color) == this_filter(inner(color))
+     */
+    virtual SkColorFilter* newComposed(const SkColorFilter* /*inner*/) const { return NULL; }
+
+    /**
      *  Apply this colorfilter to the specified SkColor. This routine handles
      *  converting to SkPMColor, calling the filter, and then converting back
      *  to SkColor. This method is not virtual, but will call filterSpan()
diff --git a/include/effects/SkColorMatrix.h b/include/effects/SkColorMatrix.h
index c598a12..7ac4579 100644
--- a/include/effects/SkColorMatrix.h
+++ b/include/effects/SkColorMatrix.h
@@ -59,6 +59,9 @@
     }
 
     bool operator!=(const SkColorMatrix& other) const { return !((*this) == other); }
+
+    static bool NeedsClamping(const SkScalar[20]);
+    static void SetConcat(SkScalar result[20], const SkScalar outer[20], const SkScalar inner[20]);
 };
 
 #endif
diff --git a/include/effects/SkColorMatrixFilter.h b/include/effects/SkColorMatrixFilter.h
index b5ab689..ff80607 100644
--- a/include/effects/SkColorMatrixFilter.h
+++ b/include/effects/SkColorMatrixFilter.h
@@ -25,6 +25,8 @@
     void filterSpan16(const uint16_t src[], int count, uint16_t[]) const SK_OVERRIDE;
     uint32_t getFlags() const SK_OVERRIDE;
     bool asColorMatrix(SkScalar matrix[20]) const SK_OVERRIDE;
+    SkColorFilter* newComposed(const SkColorFilter*) const SK_OVERRIDE;
+
 #if SK_SUPPORT_GPU
     GrFragmentProcessor* asFragmentProcessor(GrContext*) const SK_OVERRIDE;
 #endif
diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp
index d49fc52..2a78058 100644
--- a/src/core/SkColorFilter.cpp
+++ b/src/core/SkColorFilter.cpp
@@ -104,6 +104,8 @@
     return CreateComposeFilter(outer, inner);
 }
 
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
 SkColorFilter* SkColorFilter::CreateComposeFilter(SkColorFilter* outer, SkColorFilter* inner) {
     if (!outer) {
         return SkSafeRef(inner);
@@ -111,7 +113,13 @@
     if (!inner) {
         return SkSafeRef(outer);
     }
-    return SkNEW_ARGS(SkComposeColorFilter, (outer, inner));
+
+    // Give the subclass a shot at a more optimal composition...
+    SkColorFilter* composition = outer->newComposed(inner);
+    if (NULL == composition) {
+        composition = SkNEW_ARGS(SkComposeColorFilter, (outer, inner));
+    }
+    return composition;
 }
 
 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkColorFilter)
diff --git a/src/effects/SkColorFilterImageFilter.cpp b/src/effects/SkColorFilterImageFilter.cpp
index 0a4a693..fdf6de7 100755
--- a/src/effects/SkColorFilterImageFilter.cpp
+++ b/src/effects/SkColorFilterImageFilter.cpp
@@ -15,61 +15,6 @@
 #include "SkTableColorFilter.h"
 #include "SkWriteBuffer.h"
 
-namespace {
-
-void mult_color_matrix(SkScalar a[20], SkScalar b[20], SkScalar out[20]) {
-    for (int j = 0; j < 4; ++j) {
-        for (int i = 0; i < 5; ++i) {
-            out[i+j*5] = 4 == i ? a[4+j*5] : 0;
-            for (int k = 0; k < 4; ++k)
-                out[i+j*5] += a[k+j*5] * b[i+k*5];
-        }
-    }
-}
-
-// Combines the two lookup tables so that making a lookup using OUT has
-// the same effect as making a lookup through B then A.
-void combine_color_tables(const uint8_t a[4 * 256],
-                          const uint8_t b[4 * 256],
-                          uint8_t out[4 * 256]) {
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 256; j++) {
-            out[i * 256 + j] = a[i * 256 + b[i * 256 + j]];
-        }
-    }
-}
-
-// To detect if we need to apply clamping after applying a matrix, we check if
-// any output component might go outside of [0, 255] for any combination of
-// input components in [0..255].
-// Each output component is an affine transformation of the input component, so
-// the minimum and maximum values are for any combination of minimum or maximum
-// values of input components (i.e. 0 or 255).
-// E.g. if R' = x*R + y*G + z*B + w*A + t
-// Then the maximum value will be for R=255 if x>0 or R=0 if x<0, and the
-// minimum value will be for R=0 if x>0 or R=255 if x<0.
-// Same goes for all components.
-bool component_needs_clamping(SkScalar row[5]) {
-    SkScalar maxValue = row[4] / 255;
-    SkScalar minValue = row[4] / 255;
-    for (int i = 0; i < 4; ++i) {
-        if (row[i] > 0)
-            maxValue += row[i];
-        else
-            minValue += row[i];
-    }
-    return (maxValue > 1) || (minValue < 0);
-}
-
-bool matrix_needs_clamping(SkScalar matrix[20]) {
-    return component_needs_clamping(matrix)
-        || component_needs_clamping(matrix+5)
-        || component_needs_clamping(matrix+10)
-        || component_needs_clamping(matrix+15);
-}
-
-};
-
 SkColorFilterImageFilter* SkColorFilterImageFilter::Create(SkColorFilter* cf,
         SkImageFilter* input, const CropRect* cropRect, uint32_t uniqueID) {
     if (NULL == cf) {
@@ -77,35 +22,10 @@
     }
 
     SkColorFilter* inputColorFilter;
-    if (input && input->asColorFilter(&inputColorFilter) && inputColorFilter) {
+    if (input && input->asColorFilter(&inputColorFilter)) {
         SkAutoUnref autoUnref(inputColorFilter);
-
-        // Try to collapse two consecutive matrix filters
-        SkScalar colorMatrix[20], inputMatrix[20];
-        if (cf->asColorMatrix(colorMatrix) && inputColorFilter->asColorMatrix(inputMatrix)
-                                           && !matrix_needs_clamping(inputMatrix)) {
-            SkScalar combinedMatrix[20];
-            mult_color_matrix(colorMatrix, inputMatrix, combinedMatrix);
-            SkAutoTUnref<SkColorFilter> newCF(SkColorMatrixFilter::Create(combinedMatrix));
-            return SkNEW_ARGS(SkColorFilterImageFilter, (newCF, input->getInput(0), cropRect, 0));
-        }
-
-        // Try to collapse two consecutive table filters
-        SkBitmap colorTable, inputTable;
-        if (cf->asComponentTable(&colorTable) && inputColorFilter->asComponentTable(&inputTable)) {
-            uint8_t combinedTable[4 * 256];
-            SkAutoLockPixels colorLock(colorTable);
-            SkAutoLockPixels inputLock(inputTable);
-
-            combine_color_tables(colorTable.getAddr8(0, 0), inputTable.getAddr8(0, 0),
-                                 combinedTable);
-            SkAutoTUnref<SkColorFilter> newCF(SkTableColorFilter::CreateARGB(
-                        &combinedTable[256 * 0],
-                        &combinedTable[256 * 1],
-                        &combinedTable[256 * 2],
-                        &combinedTable[256 * 3])
-            );
-
+        SkAutoTUnref<SkColorFilter> newCF(cf->newComposed(inputColorFilter));
+        if (newCF) {
             return SkNEW_ARGS(SkColorFilterImageFilter, (newCF, input->getInput(0), cropRect, 0));
         }
     }
diff --git a/src/effects/SkColorMatrix.cpp b/src/effects/SkColorMatrix.cpp
index 3842285..d99d51e 100644
--- a/src/effects/SkColorMatrix.cpp
+++ b/src/effects/SkColorMatrix.cpp
@@ -6,6 +6,68 @@
  */
 #include "SkColorMatrix.h"
 
+// To detect if we need to apply clamping after applying a matrix, we check if
+// any output component might go outside of [0, 255] for any combination of
+// input components in [0..255].
+// Each output component is an affine transformation of the input component, so
+// the minimum and maximum values are for any combination of minimum or maximum
+// values of input components (i.e. 0 or 255).
+// E.g. if R' = x*R + y*G + z*B + w*A + t
+// Then the maximum value will be for R=255 if x>0 or R=0 if x<0, and the
+// minimum value will be for R=0 if x>0 or R=255 if x<0.
+// Same goes for all components.
+static bool component_needs_clamping(const SkScalar row[5]) {
+    SkScalar maxValue = row[4] / 255;
+    SkScalar minValue = row[4] / 255;
+    for (int i = 0; i < 4; ++i) {
+        if (row[i] > 0)
+            maxValue += row[i];
+        else
+            minValue += row[i];
+    }
+    return (maxValue > 1) || (minValue < 0);
+}
+
+bool SkColorMatrix::NeedsClamping(const SkScalar matrix[20]) {
+    return component_needs_clamping(matrix)
+        || component_needs_clamping(matrix+5)
+        || component_needs_clamping(matrix+10)
+        || component_needs_clamping(matrix+15);
+}
+
+void SkColorMatrix::SetConcat(SkScalar result[20],
+                              const SkScalar outer[20], const SkScalar inner[20]) {
+    SkScalar    tmp[20];
+    SkScalar*   target;
+
+    if (outer == result || inner == result) {
+        target = tmp;   // will memcpy answer when we're done into result
+    } else {
+        target = result;
+    }
+
+    int index = 0;
+    for (int j = 0; j < 20; j += 5) {
+        for (int i = 0; i < 4; i++) {
+            target[index++] =   outer[j + 0] * inner[i + 0] +
+                                outer[j + 1] * inner[i + 5] +
+                                outer[j + 2] * inner[i + 10] +
+                                outer[j + 3] * inner[i + 15];
+        }
+        target[index++] =   outer[j + 0] * inner[4] +
+                            outer[j + 1] * inner[9] +
+                            outer[j + 2] * inner[14] +
+                            outer[j + 3] * inner[19] +
+                            outer[j + 4];
+    }
+
+    if (target != result) {
+        memcpy(result, target, 20 * sizeof(SkScalar));
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
 void SkColorMatrix::setIdentity() {
     memset(fMat, 0, sizeof(fMat));
     fMat[kR_Scale] = fMat[kG_Scale] = fMat[kB_Scale] = fMat[kA_Scale] = 1;
@@ -67,38 +129,8 @@
     this->postConcat(tmp);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-
-void SkColorMatrix::setConcat(const SkColorMatrix& matA,
-                              const SkColorMatrix& matB) {
-    SkScalar    tmp[20];
-    SkScalar*   result = fMat;
-
-    if (&matA == this || &matB == this) {
-        result = tmp;
-    }
-
-    const SkScalar* a = matA.fMat;
-    const SkScalar* b = matB.fMat;
-
-    int index = 0;
-    for (int j = 0; j < 20; j += 5) {
-        for (int i = 0; i < 4; i++) {
-            result[index++] =   SkScalarMul(a[j + 0], b[i + 0]) +
-                                SkScalarMul(a[j + 1], b[i + 5]) +
-                                SkScalarMul(a[j + 2], b[i + 10]) +
-                                SkScalarMul(a[j + 3], b[i + 15]);
-        }
-        result[index++] =   SkScalarMul(a[j + 0], b[4]) +
-                            SkScalarMul(a[j + 1], b[9]) +
-                            SkScalarMul(a[j + 2], b[14]) +
-                            SkScalarMul(a[j + 3], b[19]) +
-                            a[j + 4];
-    }
-
-    if (fMat != result) {
-        memcpy(fMat, result, sizeof(fMat));
-    }
+void SkColorMatrix::setConcat(const SkColorMatrix& matA, const SkColorMatrix& matB) {
+    SetConcat(fMat, matA.fMat, matB.fMat);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -160,3 +192,4 @@
     setrow(fMat + 10, 1, kU2B, 0);
     fMat[kA_Scale] = 1;
 }
+
diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp
index c9cb186..6ab4a67 100644
--- a/src/effects/SkColorMatrixFilter.cpp
+++ b/src/effects/SkColorMatrixFilter.cpp
@@ -322,6 +322,16 @@
     return true;
 }
 
+SkColorFilter* SkColorMatrixFilter::newComposed(const SkColorFilter* innerFilter) const {
+    SkScalar innerMatrix[20];
+    if (innerFilter->asColorMatrix(innerMatrix) && !SkColorMatrix::NeedsClamping(innerMatrix)) {
+        SkScalar concat[20];
+        SkColorMatrix::SetConcat(concat, fMatrix.fMat, innerMatrix);
+        return SkColorMatrixFilter::Create(concat);
+    }
+    return NULL;
+}
+
 #if SK_SUPPORT_GPU
 #include "GrFragmentProcessor.h"
 #include "GrInvariantOutput.h"
diff --git a/src/effects/SkTableColorFilter.cpp b/src/effects/SkTableColorFilter.cpp
index 37fdd90..d6c3e5f 100644
--- a/src/effects/SkTableColorFilter.cpp
+++ b/src/effects/SkTableColorFilter.cpp
@@ -41,13 +41,13 @@
     }
 
     bool asComponentTable(SkBitmap* table) const SK_OVERRIDE;
+    SkColorFilter* newComposed(const SkColorFilter* inner) const SK_OVERRIDE;
 
 #if SK_SUPPORT_GPU
     GrFragmentProcessor* asFragmentProcessor(GrContext* context) const SK_OVERRIDE;
 #endif
 
-    virtual void filterSpan(const SkPMColor src[], int count,
-                            SkPMColor dst[]) const SK_OVERRIDE;
+    void filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const SK_OVERRIDE;
 
     SK_TO_STRING_OVERRIDE()
 
@@ -109,8 +109,7 @@
     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
 };
 
-void SkTable_ColorFilter::filterSpan(const SkPMColor src[], int count,
-                                     SkPMColor dst[]) const {
+void SkTable_ColorFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
     const uint8_t* table = fStorage;
     const uint8_t* tableA = gIdentityTable;
     const uint8_t* tableR = gIdentityTable;
@@ -274,6 +273,56 @@
     return true;
 }
 
+// Combines the two lookup tables so that making a lookup using res[] has
+// the same effect as making a lookup through inner[] then outer[].
+static void combine_tables(uint8_t res[256], const uint8_t outer[256], const uint8_t inner[256]) {
+    for (int i = 0; i < 256; i++) {
+        res[i] = outer[inner[i]];
+    }
+}
+
+SkColorFilter* SkTable_ColorFilter::newComposed(const SkColorFilter* innerFilter) const {
+    SkBitmap innerBM;
+    if (!innerFilter->asComponentTable(&innerBM)) {
+        return NULL;
+    }
+
+    innerBM.lockPixels();
+    if (NULL == innerBM.getPixels()) {
+        return NULL;
+    }
+
+    const uint8_t* table = fStorage;
+    const uint8_t* tableA = gIdentityTable;
+    const uint8_t* tableR = gIdentityTable;
+    const uint8_t* tableG = gIdentityTable;
+    const uint8_t* tableB = gIdentityTable;
+    if (fFlags & kA_Flag) {
+        tableA = table; table += 256;
+    }
+    if (fFlags & kR_Flag) {
+        tableR = table; table += 256;
+    }
+    if (fFlags & kG_Flag) {
+        tableG = table; table += 256;
+    }
+    if (fFlags & kB_Flag) {
+        tableB = table;
+    }
+
+    uint8_t concatA[256];
+    uint8_t concatR[256];
+    uint8_t concatG[256];
+    uint8_t concatB[256];
+
+    combine_tables(concatA, tableA, innerBM.getAddr8(0, 0));
+    combine_tables(concatR, tableR, innerBM.getAddr8(0, 1));
+    combine_tables(concatG, tableG, innerBM.getAddr8(0, 2));
+    combine_tables(concatB, tableB, innerBM.getAddr8(0, 3));
+
+    return SkTableColorFilter::CreateARGB(concatA, concatR, concatG, concatB);
+}
+
 #if SK_SUPPORT_GPU
 
 #include "GrFragmentProcessor.h"