fix trivial matrix computation

Revert "Reverted 10056-10059"

This reverts commit bab4ebcaa7270c3b866a8e10917c39b055ebd51a.

Fix broken GM test, reintroduce image scaling.

BUG=

Review URL: https://codereview.chromium.org/18721006

git-svn-id: http://skia.googlecode.com/svn/trunk@10066 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/core/SkBitmapFilter.cpp b/src/core/SkBitmapFilter.cpp
index 3cbafd7..434ea9a 100644
--- a/src/core/SkBitmapFilter.cpp
+++ b/src/core/SkBitmapFilter.cpp
@@ -22,28 +22,28 @@
 
     while (count-- > 0) {
         SkPoint srcPt;
-        s.fInvProc(*s.fInvMatrix, SkFloatToScalar(x + 0.5f),
+        s.fInvProc(s.fInvMatrix, SkFloatToScalar(x + 0.5f),
                     SkFloatToScalar(y + 0.5f), &srcPt);
         srcPt.fX -= SK_ScalarHalf;
         srcPt.fY -= SK_ScalarHalf;
 
-        SkFixed weight = 0;
-        SkFixed fr = 0, fg = 0, fb = 0, fa = 0;
+        SkScalar weight = 0;
+        SkScalar fr = 0, fg = 0, fb = 0, fa = 0;
 
-        int y0 = SkClampMax(sk_float_ceil2int(SkScalarToFloat(srcPt.fY)-s.getBitmapFilter()->width()), maxY);
-        int y1 = SkClampMax(sk_float_floor2int(SkScalarToFloat(srcPt.fY)+s.getBitmapFilter()->width()), maxY);
-        int x0 = SkClampMax(sk_float_ceil2int(SkScalarToFloat(srcPt.fX)-s.getBitmapFilter()->width()), maxX);
-        int x1 = SkClampMax(sk_float_floor2int(SkScalarToFloat(srcPt.fX)+s.getBitmapFilter()->width()), maxX);
+        int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
+        int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()), maxY);
+        int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
+        int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width()), maxX);
 
-        for (int src_y = y0; src_y <= y1; src_y++) {
-            SkFixed yweight = s.getBitmapFilter()->lookup((srcPt.fY - src_y));
+        for (int srcY = y0; srcY <= y1; srcY++) {
+            SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
 
-            for (int src_x = x0; src_x <= x1 ; src_x++) {
-                SkFixed xweight = s.getBitmapFilter()->lookup((srcPt.fX - src_x));
+            for (int srcX = x0; srcX <= x1 ; srcX++) {
+                SkScalar xWeight = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
 
-                SkFixed combined_weight = SkFixedMul(xweight, yweight);
+                SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
 
-                SkPMColor c = *s.fBitmap->getAddr32(src_x, src_y);
+                SkPMColor c = *s.fBitmap->getAddr32(srcX, srcY);
                 fr += combined_weight * SkGetPackedR32(c);
                 fg += combined_weight * SkGetPackedG32(c);
                 fb += combined_weight * SkGetPackedB32(c);
@@ -52,15 +52,15 @@
             }
         }
 
-        fr = SkFixedDiv(fr, weight);
-        fg = SkFixedDiv(fg, weight);
-        fb = SkFixedDiv(fb, weight);
-        fa = SkFixedDiv(fa, weight);
+        fr = SkScalarDiv(fr, weight);
+        fg = SkScalarDiv(fg, weight);
+        fb = SkScalarDiv(fb, weight);
+        fa = SkScalarDiv(fa, weight);
 
-        int a = SkClampMax(SkFixedRoundToInt(fa), 255);
-        int r = SkClampMax(SkFixedRoundToInt(fr), a);
-        int g = SkClampMax(SkFixedRoundToInt(fg), a);
-        int b = SkClampMax(SkFixedRoundToInt(fb), a);
+        int a = SkClampMax(SkScalarRoundToInt(fa), 255);
+        int r = SkClampMax(SkScalarRoundToInt(fr), a);
+        int g = SkClampMax(SkScalarRoundToInt(fg), a);
+        int b = SkClampMax(SkScalarRoundToInt(fb), a);
 
         *colors++ = SkPackARGB32(a, r, g, b);
 
@@ -75,33 +75,33 @@
 
      SkPoint srcPt;
 
-     s.fInvProc(*s.fInvMatrix, SkFloatToScalar(x + 0.5f),
+     s.fInvProc(s.fInvMatrix, SkFloatToScalar(x + 0.5f),
                  SkFloatToScalar(y + 0.5f), &srcPt);
      srcPt.fY -= SK_ScalarHalf;
-     int y0 = SkClampMax(sk_float_ceil2int(SkScalarToFloat(srcPt.fY)-s.getBitmapFilter()->width()), maxY);
-     int y1 = SkClampMax(sk_float_floor2int(SkScalarToFloat(srcPt.fY)+s.getBitmapFilter()->width()), maxY);
+     int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
+     int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()), maxY);
 
      while (count-- > 0) {
-         s.fInvProc(*s.fInvMatrix, SkFloatToScalar(x + 0.5f),
+         s.fInvProc(s.fInvMatrix, SkFloatToScalar(x + 0.5f),
                      SkFloatToScalar(y + 0.5f), &srcPt);
          srcPt.fX -= SK_ScalarHalf;
          srcPt.fY -= SK_ScalarHalf;
 
-         SkFixed weight = 0;
-         SkFixed fr = 0, fg = 0, fb = 0, fa = 0;
+         SkScalar weight = 0;
+         SkScalar fr = 0, fg = 0, fb = 0, fa = 0;
 
-         int x0 = SkClampMax(sk_float_ceil2int(SkScalarToFloat(srcPt.fX)-s.getBitmapFilter()->width()), maxX);
-         int x1 = SkClampMax(sk_float_floor2int(SkScalarToFloat(srcPt.fX)+s.getBitmapFilter()->width()), maxX);
+         int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
+         int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width()), maxX);
 
-         for (int src_y = y0; src_y <= y1; src_y++) {
-             SkFixed yweight = s.getBitmapFilter()->lookup((srcPt.fY - src_y));
+         for (int srcY = y0; srcY <= y1; srcY++) {
+             SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
 
-             for (int src_x = x0; src_x <= x1 ; src_x++) {
-                 SkFixed xweight = s.getBitmapFilter()->lookup((srcPt.fX - src_x));
+             for (int srcX = x0; srcX <= x1 ; srcX++) {
+                 SkScalar xWeight = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
 
-                 SkFixed combined_weight = SkFixedMul(xweight, yweight);
+                 SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
 
-                 SkPMColor c = *s.fBitmap->getAddr32(src_x, src_y);
+                 SkPMColor c = *s.fBitmap->getAddr32(srcX, srcY);
                  fr += combined_weight * SkGetPackedR32(c);
                  fg += combined_weight * SkGetPackedG32(c);
                  fb += combined_weight * SkGetPackedB32(c);
@@ -110,15 +110,15 @@
              }
          }
 
-         fr = SkFixedDiv(fr, weight);
-         fg = SkFixedDiv(fg, weight);
-         fb = SkFixedDiv(fb, weight);
-         fa = SkFixedDiv(fa, weight);
+         fr = SkScalarDiv(fr, weight);
+         fg = SkScalarDiv(fg, weight);
+         fb = SkScalarDiv(fb, weight);
+         fa = SkScalarDiv(fa, weight);
 
-         int a = SkClampMax(SkFixedRoundToInt(fa), 255);
-         int r = SkClampMax(SkFixedRoundToInt(fr), a);
-         int g = SkClampMax(SkFixedRoundToInt(fg), a);
-         int b = SkClampMax(SkFixedRoundToInt(fb), a);
+         int a = SkClampMax(SkScalarRoundToInt(fa), 255);
+         int r = SkClampMax(SkScalarRoundToInt(fr), a);
+         int g = SkClampMax(SkScalarRoundToInt(fg), a);
+         int b = SkClampMax(SkScalarRoundToInt(fb), a);
 
          *colors++ = SkPackARGB32(a, r, g, b);
 
@@ -147,12 +147,13 @@
 }
 
 SkBitmapProcState::ShaderProc32
-SkBitmapProcState::chooseBitmapFilterProc(const SkPaint& paint) {
-    // we need to be requested
-    uint32_t mask = SkPaint::kFilterBitmap_Flag
-                  | SkPaint::kHighQualityFilterBitmap_Flag
-                  ;
-    if ((paint.getFlags() & mask) != mask) {
+SkBitmapProcState::chooseBitmapFilterProc() {
+
+    if (fFilterQuality != kHQ_BitmapFilter) {
+        return NULL;
+    }
+
+    if (fAlphaScale != 256) {
         return NULL;
     }
 
@@ -166,11 +167,6 @@
         return NULL;
     }
 
-    // TODO: support blending inside our procs
-    if (0xFF != paint.getAlpha()) {
-        return NULL;
-    }
-
     if (fInvType & (SkMatrix::kAffine_Mask | SkMatrix::kScale_Mask)) {
         fBitmapFilter = allocateBitmapFilter();
     }
@@ -184,51 +180,51 @@
     }
 }
 
-static void divideByWeights(SkFixed *sums, SkFixed *weights, SkBitmap *dst) {
+static void divideByWeights(SkScalar *sums, SkScalar *weights, SkBitmap *dst) {
     for (int y = 0 ; y < dst->height() ; y++) {
         for (int x = 0 ; x < dst->width() ; x++) {
-            SkFixed fr = SkFixedDiv(sums[4*(y*dst->width() + x) + 0], weights[y*dst->width() + x]);
-            SkFixed fg = SkFixedDiv(sums[4*(y*dst->width() + x) + 1], weights[y*dst->width() + x]);
-            SkFixed fb = SkFixedDiv(sums[4*(y*dst->width() + x) + 2], weights[y*dst->width() + x]);
-            SkFixed fa = SkFixedDiv(sums[4*(y*dst->width() + x) + 3], weights[y*dst->width() + x]);
-            int a = SkClampMax(SkFixedRoundToInt(fa), 255);
-            int r = SkClampMax(SkFixedRoundToInt(fr), a);
-            int g = SkClampMax(SkFixedRoundToInt(fg), a);
-            int b = SkClampMax(SkFixedRoundToInt(fb), a);
+            SkScalar fr = SkScalarDiv(sums[4*(y*dst->width() + x) + 0], weights[y*dst->width() + x]);
+            SkScalar fg = SkScalarDiv(sums[4*(y*dst->width() + x) + 1], weights[y*dst->width() + x]);
+            SkScalar fb = SkScalarDiv(sums[4*(y*dst->width() + x) + 2], weights[y*dst->width() + x]);
+            SkScalar fa = SkScalarDiv(sums[4*(y*dst->width() + x) + 3], weights[y*dst->width() + x]);
+            int a = SkClampMax(SkScalarRoundToInt(fa), 255);
+            int r = SkClampMax(SkScalarRoundToInt(fr), a);
+            int g = SkClampMax(SkScalarRoundToInt(fg), a);
+            int b = SkClampMax(SkScalarRoundToInt(fb), a);
 
             *dst->getAddr32(x,y) = SkPackARGB32(a, r, g, b);
         }
     }
 }
 
-static void upScaleHoriz(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
-    for (int y = 0 ; y < src->height() ; y++) {
+static void upScaleHorizTranspose(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
+    for (int y = 0 ; y < dst->height() ; y++) {
         for (int x = 0 ; x < dst->width() ; x++) {
-            float sx = (x + 0.5f) / scale - 0.5f;
+            float sx = (y + 0.5f) / scale - 0.5f;
             int x0 = SkClampMax(sk_float_ceil2int(sx-filter->width()), src->width()-1);
             int x1 = SkClampMax(sk_float_floor2int(sx+filter->width()), src->width()-1);
 
-            SkFixed total_weight = 0;
-            SkFixed fr = 0, fg = 0, fb = 0, fa = 0;
+            SkScalar totalWeight = 0;
+            SkScalar fr = 0, fg = 0, fb = 0, fa = 0;
 
-            for (int src_x = x0 ; src_x <= x1 ; src_x++) {
-                SkFixed weight = filter->lookup(sx - src_x);
-                SkPMColor c = *src->getAddr32(src_x,y);
-                fr += weight * SkGetPackedR32(c);
-                fg += weight * SkGetPackedG32(c);
-                fb += weight * SkGetPackedB32(c);
-                fa += weight * SkGetPackedA32(c);
-                total_weight += weight;
+            for (int srcX = x0 ; srcX <= x1 ; srcX++) {
+                SkScalar weight = filter->lookupScalar(sx - srcX);
+                SkPMColor c = *src->getAddr32(srcX, x);
+                fr += SkScalarMul(weight,SkGetPackedR32(c));
+                fg += SkScalarMul(weight,SkGetPackedG32(c));
+                fb += SkScalarMul(weight,SkGetPackedB32(c));
+                fa += SkScalarMul(weight,SkGetPackedA32(c));
+                totalWeight += weight;
             }
-            fr = SkFixedDiv(fr, total_weight);
-            fg = SkFixedDiv(fg, total_weight);
-            fb = SkFixedDiv(fb, total_weight);
-            fa = SkFixedDiv(fa, total_weight);
+            fr = SkScalarDiv(fr,totalWeight);
+            fg = SkScalarDiv(fg,totalWeight);
+            fb = SkScalarDiv(fb,totalWeight);
+            fa = SkScalarDiv(fa,totalWeight);
 
-            int a = SkClampMax(SkFixedRoundToInt(fa), 255);
-            int r = SkClampMax(SkFixedRoundToInt(fr), a);
-            int g = SkClampMax(SkFixedRoundToInt(fg), a);
-            int b = SkClampMax(SkFixedRoundToInt(fb), a);
+            int a = SkClampMax(SkScalarRoundToInt(fa), 255);
+            int r = SkClampMax(SkScalarRoundToInt(fr), a);
+            int g = SkClampMax(SkScalarRoundToInt(fg), a);
+            int b = SkClampMax(SkScalarRoundToInt(fb), a);
 
             *dst->getAddr32(x,y) = SkPackARGB32(a, r, g, b);
         }
@@ -236,14 +232,14 @@
 }
 
 static void downScaleHoriz(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
-    SkFixed *sums = SkNEW_ARRAY(SkFixed, dst->width() * dst->height() * 4);
-    SkFixed *weights = SkNEW_ARRAY(SkFixed, dst->width() * dst->height());
+    SkScalar *sums = SkNEW_ARRAY(SkScalar, dst->width() * dst->height() * 4);
+    SkScalar *weights = SkNEW_ARRAY(SkScalar, dst->width() * dst->height());
 
-    SkAutoTDeleteArray<SkFixed> ada1(sums);
-    SkAutoTDeleteArray<SkFixed> ada2(weights);
+    SkAutoTDeleteArray<SkScalar> ada1(sums);
+    SkAutoTDeleteArray<SkScalar> ada2(weights);
 
-    memset(sums, 0, dst->width() * dst->height() * sizeof(SkFixed) * 4);
-    memset(weights, 0, dst->width() * dst->height() * sizeof(SkFixed));
+    memset(sums, 0, dst->width() * dst->height() * sizeof(SkScalar) * 4);
+    memset(weights, 0, dst->width() * dst->height() * sizeof(SkScalar));
 
     for (int y = 0 ; y < src->height() ; y++) {
         for (int x = 0 ; x < src->width() ; x++) {
@@ -255,7 +251,7 @@
             SkPMColor c = *src->getAddr32(x,y);
 
             for (int dst_x = x0 ; dst_x <= x1 ; dst_x++) {
-                SkFixed weight = filter->lookup(dx - dst_x);
+                SkScalar weight = filter->lookup(dx - dst_x);
                 sums[4*(y*dst->width() + dst_x) + 0] += weight*SkGetPackedR32(c);
                 sums[4*(y*dst->width() + dst_x) + 1] += weight*SkGetPackedG32(c);
                 sums[4*(y*dst->width() + dst_x) + 2] += weight*SkGetPackedB32(c);
@@ -268,49 +264,15 @@
     divideByWeights(sums, weights, dst);
 }
 
-static void upScaleVert(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
-    for (int y = 0 ; y < dst->height() ; y++) {
-        for (int x = 0 ; x < dst->width() ; x++) {
-            float sy = (y + 0.5f) / scale - 0.5f;
-            int y0 = SkClampMax(sk_float_ceil2int(sy-filter->width()), src->height()-1);
-            int y1 = SkClampMax(sk_float_floor2int(sy+filter->width()), src->height()-1);
-
-            SkFixed total_weight = 0;
-            SkFixed fr = 0, fg = 0, fb = 0, fa = 0;
-
-            for (int src_y = y0 ; src_y <= y1 ; src_y++) {
-                SkFixed weight = filter->lookup(sy - src_y);
-                SkPMColor c = *src->getAddr32(x,src_y);
-                fr += weight * SkGetPackedR32(c);
-                fg += weight * SkGetPackedG32(c);
-                fb += weight * SkGetPackedB32(c);
-                fa += weight * SkGetPackedA32(c);
-                total_weight += weight;
-            }
-            fr = SkFixedDiv(fr, total_weight);
-            fg = SkFixedDiv(fg, total_weight);
-            fb = SkFixedDiv(fb, total_weight);
-            fa = SkFixedDiv(fa, total_weight);
-
-            int a = SkClampMax(SkFixedRoundToInt(fa), 255);
-            int r = SkClampMax(SkFixedRoundToInt(fr), a);
-            int g = SkClampMax(SkFixedRoundToInt(fg), a);
-            int b = SkClampMax(SkFixedRoundToInt(fb), a);
-
-            *dst->getAddr32(x,y) = SkPackARGB32(a, r, g, b);
-        }
-    }
-}
-
 static void downScaleVert(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
-    SkFixed *sums = SkNEW_ARRAY(SkFixed, dst->width() * dst->height() * 4);
-    SkFixed *weights = SkNEW_ARRAY(SkFixed, dst->width() * dst->height());
+    SkScalar *sums = SkNEW_ARRAY(SkScalar, dst->width() * dst->height() * 4);
+    SkScalar *weights = SkNEW_ARRAY(SkScalar, dst->width() * dst->height());
 
-    SkAutoTDeleteArray<SkFixed> ada1(sums);
-    SkAutoTDeleteArray<SkFixed> ada2(weights);
+    SkAutoTDeleteArray<SkScalar> ada1(sums);
+    SkAutoTDeleteArray<SkScalar> ada2(weights);
 
-    memset(sums, 0, dst->width() * dst->height() * sizeof(SkFixed) * 4);
-    memset(weights, 0, dst->width() * dst->height() * sizeof(SkFixed));
+    memset(sums, 0, dst->width() * dst->height() * sizeof(SkScalar) * 4);
+    memset(weights, 0, dst->width() * dst->height() * sizeof(SkScalar));
 
     for (int y = 0 ; y < src->height() ; y++) {
         for (int x = 0 ; x < src->width() ; x++) {
@@ -322,7 +284,7 @@
             SkPMColor c = *src->getAddr32(x,y);
 
             for (int dst_y = y0 ; dst_y <= y1 ; dst_y++) {
-                SkFixed weight = filter->lookup(dy - dst_y);
+                SkScalar weight = filter->lookupScalar(dy - dst_y);
                 sums[4*(dst_y*dst->width() + x) + 0] += weight*SkGetPackedR32(c);
                 sums[4*(dst_y*dst->width() + x) + 1] += weight*SkGetPackedG32(c);
                 sums[4*(dst_y*dst->width() + x) + 2] += weight*SkGetPackedB32(c);
@@ -337,31 +299,27 @@
 
 void SkBitmap::scale(SkBitmap *dst) const {
 
-    SkBitmap horiz_temp;
+    SkBitmap horizTemp;
 
-    horiz_temp.setConfig(SkBitmap::kARGB_8888_Config, dst->width(), height());
-    horiz_temp.allocPixels();
+    horizTemp.setConfig(SkBitmap::kARGB_8888_Config, height(), dst->width());
+    horizTemp.allocPixels();
 
     SkBitmapFilter *filter = allocateBitmapFilter();
 
-    float horiz_scale = float(dst->width()) / width();
+    float horizScale = float(dst->width()) / width();
 
-    if (horiz_scale == 1) {
-        this->copyPixelsTo(horiz_temp.getPixels(), getSize());
-    } else if (horiz_scale > 1) {
-        upScaleHoriz(this, &horiz_temp, horiz_scale, filter);
-    } else if (horiz_scale < 1) {
-        downScaleHoriz(this, &horiz_temp, horiz_scale, filter);
+    if (horizScale >= 1) {
+        upScaleHorizTranspose(this, &horizTemp, horizScale, filter);
+    } else if (horizScale < 1) {
+        downScaleHoriz(this, &horizTemp, horizScale, filter);
     }
 
-    float vert_scale = float(dst->height()) / height();
+    float vertScale = float(dst->height()) / height();
 
-    if (vert_scale == 1) {
-        horiz_temp.copyPixelsTo(dst->getPixels(), dst->getSize());
-    } else if (vert_scale > 1) {
-        upScaleVert(&horiz_temp, dst, vert_scale, filter);
-    } else if (vert_scale < 1) {
-        downScaleVert(&horiz_temp, dst, vert_scale, filter);
+    if (vertScale >= 1) {
+        upScaleHorizTranspose(&horizTemp, dst, vertScale, filter);
+    } else if (vertScale < 1) {
+        downScaleVert(&horizTemp, dst, vertScale, filter);
     }
 
     SkDELETE(filter);
diff --git a/src/core/SkBitmapFilter.h b/src/core/SkBitmapFilter.h
index 8273420..38c2448 100644
--- a/src/core/SkBitmapFilter.h
+++ b/src/core/SkBitmapFilter.h
@@ -22,23 +22,26 @@
   public:
       SkBitmapFilter(float width)
       : fWidth(width), fInvWidth(1.f/width) {
-          precomputed = false;
+          fPrecomputed = false;
+          fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
       }
 
       SkFixed lookup( float x ) const {
-          if (!precomputed) {
+          if (!fPrecomputed) {
               precomputeTable();
           }
-          int filter_idx = int(fabsf(x * invWidth() * SKBITMAP_FILTER_TABLE_SIZE));
-          return fFilterTable[ SkTMin(filter_idx, SKBITMAP_FILTER_TABLE_SIZE-1) ];
+          int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
+          SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
+          return fFilterTable[ filter_idx ];
       }
 
-      float lookupFloat( float x ) const {
-          if (!precomputed) {
+      SkScalar lookupScalar( float x ) const {
+          if (!fPrecomputed) {
               precomputeTable();
           }
-          int filter_idx = int(fabsf(x * invWidth() * SKBITMAP_FILTER_TABLE_SIZE));
-          return fFilterTableFloat[ SkTMin(filter_idx, SKBITMAP_FILTER_TABLE_SIZE-1) ];
+          int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
+          SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
+          return fFilterTableScalar[ filter_idx ];
       }
 
       float width() const { return fWidth; }
@@ -49,18 +52,20 @@
       float fWidth;
       float fInvWidth;
 
-      mutable bool precomputed;
+      float fLookupMultiplier;
+
+      mutable bool fPrecomputed;
       mutable SkFixed fFilterTable[SKBITMAP_FILTER_TABLE_SIZE];
-      mutable float fFilterTableFloat[SKBITMAP_FILTER_TABLE_SIZE];
+      mutable SkScalar fFilterTableScalar[SKBITMAP_FILTER_TABLE_SIZE];
   private:
       void precomputeTable() const {
-          precomputed = true;
+          fPrecomputed = true;
           SkFixed *ftp = fFilterTable;
-          float *ftp_float = fFilterTableFloat;
+          SkScalar *ftpScalar = fFilterTableScalar;
           for (int x = 0; x < SKBITMAP_FILTER_TABLE_SIZE; ++x) {
               float fx = ((float)x + .5f) * this->width() / SKBITMAP_FILTER_TABLE_SIZE;
               float filter_value = evaluate(fx);
-              *ftp_float++ = filter_value;
+              *ftpScalar++ = SkFloatToScalar(filter_value);
               *ftp++ = SkFloatToFixed(filter_value);
           }
       }
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp
index 96cfea6..76ccb51 100644
--- a/src/core/SkBitmapProcShader.cpp
+++ b/src/core/SkBitmapProcShader.cpp
@@ -142,6 +142,7 @@
 
 void SkBitmapProcShader::endContext() {
     fState.fOrigBitmap.unlockPixels();
+    fState.endContext();
     this->INHERITED::endContext();
 }
 
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp
index a2c7f99..a8a9b03 100644
--- a/src/core/SkBitmapProcState.cpp
+++ b/src/core/SkBitmapProcState.cpp
@@ -90,220 +90,346 @@
     return (dimension & ~0x3FFF) == 0;
 }
 
+// TODO -- we may want to pass the clip into this function so we only scale
+// the portion of the image that we're going to need.  This will complicate
+// the interface to the cache, but might be well worth it.
+
+void SkBitmapProcState::possiblyScaleImage() {
+
+    if (fFilterQuality != kHQ_BitmapFilter) {
+        return;
+    }
+
+    // STEP 1: UPSAMPLE?
+
+    // Check to see if the transformation matrix is scaling up, and if
+    // the matrix is simple, and if we're doing high quality scaling.
+    // If so, do the bitmap scale here and remove the scaling component from the matrix.
+
+    if (fInvMatrix.getType() <= (SkMatrix::kScale_Mask | SkMatrix::kTranslate_Mask) &&
+        (fInvMatrix.getScaleX() < 1 || fInvMatrix.getScaleY() < 1) &&
+        fOrigBitmap.config() == SkBitmap::kARGB_8888_Config) {
+
+        // All the criteria are met; let's make a new bitmap.
+        fScaledBitmap.setConfig(SkBitmap::kARGB_8888_Config,
+                                (int)(fOrigBitmap.width() / fInvMatrix.getScaleX()),
+                                (int)(fOrigBitmap.height() / fInvMatrix.getScaleY()));
+        fScaledBitmap.allocPixels();
+        fOrigBitmap.scale(&fScaledBitmap);
+        fBitmap = &fScaledBitmap;
+
+        // set the inv matrix type to translate-only;
+
+        fInvMatrix.setTranslate( 1/fInvMatrix.getScaleX() * fInvMatrix.getTranslateX(),
+                                 1/fInvMatrix.getScaleY() * fInvMatrix.getTranslateY() );
+
+        // no need for any further filtering; we just did it!
+
+        fFilterQuality = kNone_BitmapFilter;
+
+        return;
+    }
+
+    if (!fOrigBitmap.hasMipMap()) {
+
+        // STEP 2: DOWNSAMPLE
+
+        // Check to see if the transformation matrix is scaling *down*.
+        // If so, automatically build mipmaps.
+
+        SkPoint v1, v2;
+
+        // conservatively estimate if the matrix is scaling down by seeing
+        // what its upper left 2x2 portion does to two unit vectors.
+
+        v1.fX = fInvMatrix.getScaleX();
+        v1.fY = fInvMatrix.getSkewY();
+
+        v2.fX = fInvMatrix.getSkewX();
+        v2.fY = fInvMatrix.getScaleY();
+
+        if (v1.fX * v1.fX + v1.fY * v1.fY > 1 ||
+            v2.fX * v2.fX + v2.fY * v2.fY > 1) {
+            fOrigBitmap.buildMipMap();
+
+            // Now that we've built the mipmaps and we know we're downsampling,
+            // downgrade to bilinear interpolation for the mip level.
+
+            fFilterQuality = kBilerp_BitmapFilter;
+        }
+    }
+
+    if (fOrigBitmap.hasMipMap()) {
+
+        // STEP 3: We've got mipmaps, let's choose the closest level as our render
+        // source and adjust the matrix accordingly.
+
+        int shift = fOrigBitmap.extractMipLevel(&fScaledBitmap,
+                                                SkScalarToFixed(fInvMatrix.getScaleX()),
+                                                SkScalarToFixed(fInvMatrix.getSkewY()));
+
+        if (shift > 0) {
+            SkScalar scale = SkFixedToScalar(SK_Fixed1 >> shift);
+            fInvMatrix.postScale(scale, scale);
+            fBitmap = &fScaledBitmap;
+        }
+    }
+}
+
+void SkBitmapProcState::endContext() {
+    SkDELETE(fBitmapFilter);
+    fBitmapFilter = NULL;
+    fScaledBitmap.reset();
+}
+
 bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
     if (fOrigBitmap.width() == 0 || fOrigBitmap.height() == 0) {
         return false;
     }
 
-    const SkMatrix* m;
-    bool trivial_matrix = (inv.getType() & ~SkMatrix::kTranslate_Mask) == 0;
-    bool clamp_clamp = SkShader::kClamp_TileMode == fTileModeX &&
+    bool trivialMatrix = (inv.getType() & ~SkMatrix::kTranslate_Mask) == 0;
+    bool clampClamp = SkShader::kClamp_TileMode == fTileModeX &&
                        SkShader::kClamp_TileMode == fTileModeY;
 
-    if (clamp_clamp || trivial_matrix) {
-        m = &inv;
-    } else {
-        fUnitInvMatrix = inv;
-        fUnitInvMatrix.postIDiv(fOrigBitmap.width(), fOrigBitmap.height());
-        m = &fUnitInvMatrix;
+    fInvMatrix = inv;
+    if (!(clampClamp || trivialMatrix)) {
+        fInvMatrix.postIDiv(fOrigBitmap.width(), fOrigBitmap.height());
     }
 
     fBitmap = &fOrigBitmap;
-    if (fOrigBitmap.hasMipMap()) {
-        int shift = fOrigBitmap.extractMipLevel(&fMipBitmap,
-                                                SkScalarToFixed(m->getScaleX()),
-                                                SkScalarToFixed(m->getSkewY()));
 
-        if (shift > 0) {
-            if (m != &fUnitInvMatrix) {
-                fUnitInvMatrix = *m;
-                m = &fUnitInvMatrix;
-            }
+    // initialize our filter quality to the one requested by the caller.
+    // We may downgrade it later if we determine that we either don't need
+    // or can't provide as high a quality filtering as the user requested.
 
-            SkScalar scale = SkFixedToScalar(SK_Fixed1 >> shift);
-            fUnitInvMatrix.postScale(scale, scale);
-
-            // now point here instead of fOrigBitmap
-            fBitmap = &fMipBitmap;
+    fFilterQuality = kNone_BitmapFilter;
+    if (paint.isFilterBitmap()) {
+        if (paint.getFlags() & SkPaint::kHighQualityFilterBitmap_Flag) {
+            fFilterQuality = kHQ_BitmapFilter;
+        } else {
+            fFilterQuality = kBilerp_BitmapFilter;
         }
     }
 
-    // wack our matrix to exactly no-scale, if we're really close to begin with
-    if (matrix_only_scale_translate(*m)) {
+#ifndef SK_IGNORE_IMAGE_PRESCALE
+    // possiblyScaleImage will look to see if it can rescale the image as a
+    // preprocess; either by scaling up to the target size, or by selecting
+    // a nearby mipmap level.  If it does, it will adjust the working
+    // matrix as well as the working bitmap.  It may also adjust the filter
+    // quality to avoid re-filtering an already perfectly scaled image.
+
+    this->possiblyScaleImage();
+#endif
+
+    // Now that all possible changes to the matrix have taken place, check
+    // to see if we're really close to a no-scale matrix.  If so, explicitly
+    // set it to be so.  Subsequent code may inspect this matrix to choose
+    // a faster path in this case.
+
+    // This code will only execute if the matrix has some scale component;
+    // if it's already pure translate then we won't do this inversion.
+
+    if (matrix_only_scale_translate(fInvMatrix)) {
         SkMatrix forward;
-        if (m->invert(&forward)) {
-            if (clamp_clamp ? just_trans_clamp(forward, *fBitmap)
+        if (fInvMatrix.invert(&forward)) {
+            if (clampClamp ? just_trans_clamp(forward, *fBitmap)
                             : just_trans_general(forward)) {
                 SkScalar tx = -SkScalarRoundToScalar(forward.getTranslateX());
                 SkScalar ty = -SkScalarRoundToScalar(forward.getTranslateY());
-                fUnitInvMatrix.setTranslate(tx, ty);
-                m = &fUnitInvMatrix;
-                // now the following code will sniff m, and decide to take the
-                // fast case (since m is purely translate).
+                fInvMatrix.setTranslate(tx, ty);
+
             }
         }
     }
 
-    // Below this point, we should never refer to the inv parameter, since we
-    // may be using a munged version for "our" inverse.
-
-    fInvMatrix      = m;
-    fInvProc        = m->getMapXYProc();
-    fInvType        = m->getType();
-    fInvSx          = SkScalarToFixed(m->getScaleX());
-    fInvSxFractionalInt = SkScalarToFractionalInt(m->getScaleX());
-    fInvKy          = SkScalarToFixed(m->getSkewY());
-    fInvKyFractionalInt = SkScalarToFractionalInt(m->getSkewY());
+    fInvProc        = fInvMatrix.getMapXYProc();
+    fInvType        = fInvMatrix.getType();
+    fInvSx          = SkScalarToFixed(fInvMatrix.getScaleX());
+    fInvSxFractionalInt = SkScalarToFractionalInt(fInvMatrix.getScaleX());
+    fInvKy          = SkScalarToFixed(fInvMatrix.getSkewY());
+    fInvKyFractionalInt = SkScalarToFractionalInt(fInvMatrix.getSkewY());
 
     fAlphaScale = SkAlpha255To256(paint.getAlpha());
 
-    // pick-up filtering from the paint, but only if the matrix is
-    // more complex than identity/translate (i.e. no need to pay the cost
-    // of filtering if we're not scaled etc.).
-    // note: we explicitly check inv, since m might be scaled due to unitinv
-    //       trickery, but we don't want to see that for this test
-    fDoFilter = paint.isFilterBitmap() &&
-                (fInvType > SkMatrix::kTranslate_Mask &&
-                 valid_for_filtering(fBitmap->width() | fBitmap->height()));
-
     fShaderProc32 = NULL;
     fShaderProc16 = NULL;
     fSampleProc32 = NULL;
     fSampleProc16 = NULL;
 
-    fMatrixProc = this->chooseMatrixProc(trivial_matrix);
+    // recompute the triviality of the matrix here because we may have
+    // changed it!
+
+    trivialMatrix = (fInvMatrix.getType() & ~SkMatrix::kTranslate_Mask) == 0;
+
+    if (kHQ_BitmapFilter == fFilterQuality) {
+        // If this is still set, that means we wanted HQ sampling
+        // but couldn't do it as a preprocess.  Let's try to install
+        // the scanline version of the HQ sampler.  If that process fails,
+        // downgrade to bilerp.
+
+        // NOTE: Might need to be careful here in the future when we want
+        // to have the platform proc have a shot at this; it's possible that
+        // the chooseBitmapFilterProc will fail to install a shader but a
+        // platform-specific one might succeed, so it might be premature here
+        // to fall back to bilerp.  This needs thought.
+
+        SkASSERT(fInvType > SkMatrix::kTranslate_Mask);
+
+        fShaderProc32 = this->chooseBitmapFilterProc();
+        if (!fShaderProc32) {
+            fFilterQuality = kBilerp_BitmapFilter;
+        }
+    }
+
+    if (kBilerp_BitmapFilter == fFilterQuality) {
+        // Only try bilerp if the matrix is "interesting" and
+        // the image has a suitable size.
+
+        if (fInvType <= SkMatrix::kTranslate_Mask ||
+            !valid_for_filtering(fBitmap->width() | fBitmap->height())) {
+                 fFilterQuality = kNone_BitmapFilter;
+        }
+    }
+
+    // At this point, we know exactly what kind of sampling the per-scanline
+    // shader will perform.
+
+    fMatrixProc = this->chooseMatrixProc(trivialMatrix);
     if (NULL == fMatrixProc) {
         return false;
     }
 
     ///////////////////////////////////////////////////////////////////////
 
-    int index = 0;
-    if (fAlphaScale < 256) {  // note: this distinction is not used for D16
-        index |= 1;
-    }
-    if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
-        index |= 2;
-    }
-    if (fDoFilter) {
-        index |= 4;
-    }
-    // bits 3,4,5 encoding the source bitmap format
-    switch (fBitmap->config()) {
-        case SkBitmap::kARGB_8888_Config:
-            index |= 0;
-            break;
-        case SkBitmap::kRGB_565_Config:
-            index |= 8;
-            break;
-        case SkBitmap::kIndex8_Config:
-            index |= 16;
-            break;
-        case SkBitmap::kARGB_4444_Config:
-            index |= 24;
-            break;
-        case SkBitmap::kA8_Config:
-            index |= 32;
-            fPaintPMColor = SkPreMultiplyColor(paint.getColor());
-            break;
-        default:
-            return false;
-    }
+    // No need to do this if we're doing HQ sampling; if filter quality is
+    // still set to HQ by the time we get here, then we must have installed
+    // the shader proc above and can skip all this.
 
-#if !SK_ARM_NEON_IS_ALWAYS
-    static const SampleProc32 gSkBitmapProcStateSample32[] = {
-        S32_opaque_D32_nofilter_DXDY,
-        S32_alpha_D32_nofilter_DXDY,
-        S32_opaque_D32_nofilter_DX,
-        S32_alpha_D32_nofilter_DX,
-        S32_opaque_D32_filter_DXDY,
-        S32_alpha_D32_filter_DXDY,
-        S32_opaque_D32_filter_DX,
-        S32_alpha_D32_filter_DX,
+    if (fFilterQuality < kHQ_BitmapFilter) {
 
-        S16_opaque_D32_nofilter_DXDY,
-        S16_alpha_D32_nofilter_DXDY,
-        S16_opaque_D32_nofilter_DX,
-        S16_alpha_D32_nofilter_DX,
-        S16_opaque_D32_filter_DXDY,
-        S16_alpha_D32_filter_DXDY,
-        S16_opaque_D32_filter_DX,
-        S16_alpha_D32_filter_DX,
-
-        SI8_opaque_D32_nofilter_DXDY,
-        SI8_alpha_D32_nofilter_DXDY,
-        SI8_opaque_D32_nofilter_DX,
-        SI8_alpha_D32_nofilter_DX,
-        SI8_opaque_D32_filter_DXDY,
-        SI8_alpha_D32_filter_DXDY,
-        SI8_opaque_D32_filter_DX,
-        SI8_alpha_D32_filter_DX,
-
-        S4444_opaque_D32_nofilter_DXDY,
-        S4444_alpha_D32_nofilter_DXDY,
-        S4444_opaque_D32_nofilter_DX,
-        S4444_alpha_D32_nofilter_DX,
-        S4444_opaque_D32_filter_DXDY,
-        S4444_alpha_D32_filter_DXDY,
-        S4444_opaque_D32_filter_DX,
-        S4444_alpha_D32_filter_DX,
-
-        // A8 treats alpha/opaque the same (equally efficient)
-        SA8_alpha_D32_nofilter_DXDY,
-        SA8_alpha_D32_nofilter_DXDY,
-        SA8_alpha_D32_nofilter_DX,
-        SA8_alpha_D32_nofilter_DX,
-        SA8_alpha_D32_filter_DXDY,
-        SA8_alpha_D32_filter_DXDY,
-        SA8_alpha_D32_filter_DX,
-        SA8_alpha_D32_filter_DX
-    };
-
-    static const SampleProc16 gSkBitmapProcStateSample16[] = {
-        S32_D16_nofilter_DXDY,
-        S32_D16_nofilter_DX,
-        S32_D16_filter_DXDY,
-        S32_D16_filter_DX,
-
-        S16_D16_nofilter_DXDY,
-        S16_D16_nofilter_DX,
-        S16_D16_filter_DXDY,
-        S16_D16_filter_DX,
-
-        SI8_D16_nofilter_DXDY,
-        SI8_D16_nofilter_DX,
-        SI8_D16_filter_DXDY,
-        SI8_D16_filter_DX,
-
-        // Don't support 4444 -> 565
-        NULL, NULL, NULL, NULL,
-        // Don't support A8 -> 565
-        NULL, NULL, NULL, NULL
-    };
-#endif
-
-    fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index];
-    index >>= 1;    // shift away any opaque/alpha distinction
-    fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index];
-
-    // our special-case shaderprocs
-    if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) {
-        if (clamp_clamp) {
-            fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc);
-        } else if (SkShader::kRepeat_TileMode == fTileModeX &&
-                   SkShader::kRepeat_TileMode == fTileModeY) {
-            fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc);
+        int index = 0;
+        if (fAlphaScale < 256) {  // note: this distinction is not used for D16
+            index |= 1;
         }
-    } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clamp_clamp) {
-        fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc);
-    }
+        if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
+            index |= 2;
+        }
+        if (fFilterQuality != kNone_BitmapFilter) {
+            index |= 4;
+        }
+        // bits 3,4,5 encoding the source bitmap format
+        switch (fBitmap->config()) {
+            case SkBitmap::kARGB_8888_Config:
+                index |= 0;
+                break;
+            case SkBitmap::kRGB_565_Config:
+                index |= 8;
+                break;
+            case SkBitmap::kIndex8_Config:
+                index |= 16;
+                break;
+            case SkBitmap::kARGB_4444_Config:
+                index |= 24;
+                break;
+            case SkBitmap::kA8_Config:
+                index |= 32;
+                fPaintPMColor = SkPreMultiplyColor(paint.getColor());
+                break;
+            default:
+                return false;
+        }
 
-    if (NULL == fShaderProc32) {
-        fShaderProc32 = this->chooseShaderProc32();
-    }
+    #if !SK_ARM_NEON_IS_ALWAYS
+        static const SampleProc32 gSkBitmapProcStateSample32[] = {
+            S32_opaque_D32_nofilter_DXDY,
+            S32_alpha_D32_nofilter_DXDY,
+            S32_opaque_D32_nofilter_DX,
+            S32_alpha_D32_nofilter_DX,
+            S32_opaque_D32_filter_DXDY,
+            S32_alpha_D32_filter_DXDY,
+            S32_opaque_D32_filter_DX,
+            S32_alpha_D32_filter_DX,
 
-    if (NULL == fShaderProc32) {
-        fShaderProc32 = this->chooseBitmapFilterProc(paint);
+            S16_opaque_D32_nofilter_DXDY,
+            S16_alpha_D32_nofilter_DXDY,
+            S16_opaque_D32_nofilter_DX,
+            S16_alpha_D32_nofilter_DX,
+            S16_opaque_D32_filter_DXDY,
+            S16_alpha_D32_filter_DXDY,
+            S16_opaque_D32_filter_DX,
+            S16_alpha_D32_filter_DX,
+
+            SI8_opaque_D32_nofilter_DXDY,
+            SI8_alpha_D32_nofilter_DXDY,
+            SI8_opaque_D32_nofilter_DX,
+            SI8_alpha_D32_nofilter_DX,
+            SI8_opaque_D32_filter_DXDY,
+            SI8_alpha_D32_filter_DXDY,
+            SI8_opaque_D32_filter_DX,
+            SI8_alpha_D32_filter_DX,
+
+            S4444_opaque_D32_nofilter_DXDY,
+            S4444_alpha_D32_nofilter_DXDY,
+            S4444_opaque_D32_nofilter_DX,
+            S4444_alpha_D32_nofilter_DX,
+            S4444_opaque_D32_filter_DXDY,
+            S4444_alpha_D32_filter_DXDY,
+            S4444_opaque_D32_filter_DX,
+            S4444_alpha_D32_filter_DX,
+
+            // A8 treats alpha/opaque the same (equally efficient)
+            SA8_alpha_D32_nofilter_DXDY,
+            SA8_alpha_D32_nofilter_DXDY,
+            SA8_alpha_D32_nofilter_DX,
+            SA8_alpha_D32_nofilter_DX,
+            SA8_alpha_D32_filter_DXDY,
+            SA8_alpha_D32_filter_DXDY,
+            SA8_alpha_D32_filter_DX,
+            SA8_alpha_D32_filter_DX
+        };
+
+        static const SampleProc16 gSkBitmapProcStateSample16[] = {
+            S32_D16_nofilter_DXDY,
+            S32_D16_nofilter_DX,
+            S32_D16_filter_DXDY,
+            S32_D16_filter_DX,
+
+            S16_D16_nofilter_DXDY,
+            S16_D16_nofilter_DX,
+            S16_D16_filter_DXDY,
+            S16_D16_filter_DX,
+
+            SI8_D16_nofilter_DXDY,
+            SI8_D16_nofilter_DX,
+            SI8_D16_filter_DXDY,
+            SI8_D16_filter_DX,
+
+            // Don't support 4444 -> 565
+            NULL, NULL, NULL, NULL,
+            // Don't support A8 -> 565
+            NULL, NULL, NULL, NULL
+        };
+    #endif
+
+        fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index];
+        index >>= 1;    // shift away any opaque/alpha distinction
+        fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index];
+
+        // our special-case shaderprocs
+        if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) {
+            if (clampClamp) {
+                fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc);
+            } else if (SkShader::kRepeat_TileMode == fTileModeX &&
+                       SkShader::kRepeat_TileMode == fTileModeY) {
+                fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc);
+            }
+        } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clampClamp) {
+            fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc);
+        }
+
+        if (NULL == fShaderProc32) {
+            fShaderProc32 = this->chooseShaderProc32();
+        }
     }
 
     // see if our platform has any accelerated overrides
@@ -319,7 +445,7 @@
     SkASSERT(((s.fInvType & ~SkMatrix::kTranslate_Mask)) == 0);
     SkASSERT(s.fInvKy == 0);
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(!s.fDoFilter);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
 
     const int maxX = s.fBitmap->width() - 1;
     const int maxY = s.fBitmap->height() - 1;
@@ -328,7 +454,7 @@
 #ifdef SK_DEBUG
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                    SkIntToScalar(y) + SK_ScalarHalf, &pt);
         int iy2 = SkClampMax(SkScalarFloorToInt(pt.fY), maxY);
         int ix2 = SkScalarFloorToInt(pt.fX);
@@ -393,7 +519,7 @@
     SkASSERT(((s.fInvType & ~SkMatrix::kTranslate_Mask)) == 0);
     SkASSERT(s.fInvKy == 0);
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(!s.fDoFilter);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
 
     const int stopX = s.fBitmap->width();
     const int stopY = s.fBitmap->height();
@@ -402,7 +528,7 @@
 #ifdef SK_DEBUG
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                    SkIntToScalar(y) + SK_ScalarHalf, &pt);
         int iy2 = sk_int_mod(SkScalarFloorToInt(pt.fY), stopY);
         int ix2 = SkScalarFloorToInt(pt.fX);
@@ -439,7 +565,7 @@
     int iY1   SK_INIT_TO_AVOID_WARNING;
     int iSubY SK_INIT_TO_AVOID_WARNING;
 
-    if (s.fDoFilter) {
+    if (s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter) {
         SkBitmapProcState::MatrixProc mproc = s.getMatrixProc();
         uint32_t xy[2];
 
@@ -453,7 +579,7 @@
 
         if (s.fInvType > SkMatrix::kTranslate_Mask) {
             SkPoint pt;
-            s.fInvProc(*s.fInvMatrix,
+            s.fInvProc(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf,
                        &pt);
@@ -488,7 +614,7 @@
 #ifdef SK_DEBUG
         {
             SkPoint pt;
-            s.fInvProc(*s.fInvMatrix,
+            s.fInvProc(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf,
                        &pt);
@@ -520,7 +646,7 @@
     const SkPMColor* row0 = s.fBitmap->getAddr32(0, iY0);
     SkPMColor color;
 
-    if (s.fDoFilter) {
+    if (s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter) {
         const SkPMColor* row1 = s.fBitmap->getAddr32(0, iY1);
 
         if (s.fAlphaScale < 256) {
@@ -547,7 +673,7 @@
 
 bool SkBitmapProcState::setupForTranslate() {
     SkPoint pt;
-    fInvProc(*fInvMatrix, SK_ScalarHalf, SK_ScalarHalf, &pt);
+    fInvProc(fInvMatrix, SK_ScalarHalf, SK_ScalarHalf, &pt);
 
     /*
      *  if the translate is larger than our ints, we can get random results, or
@@ -576,7 +702,9 @@
     static const unsigned kMask = SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask;
 
     if (1 == fBitmap->width() && 0 == (fInvType & ~kMask)) {
-        if (!fDoFilter && fInvType <= SkMatrix::kTranslate_Mask && !this->setupForTranslate()) {
+        if (kNone_BitmapFilter == fFilterQuality &&
+            fInvType <= SkMatrix::kTranslate_Mask &&
+            !this->setupForTranslate()) {
             return DoNothing_shaderproc;
         }
         return S32_D32_constX_shaderproc;
@@ -588,7 +716,7 @@
     if (fInvType > SkMatrix::kTranslate_Mask) {
         return NULL;
     }
-    if (fDoFilter) {
+    if (fFilterQuality != kNone_BitmapFilter) {
         return NULL;
     }
 
@@ -684,9 +812,9 @@
     //  scale -vs- affine
     //  filter -vs- nofilter
     if (state.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
-        proc = state.fDoFilter ? check_scale_filter : check_scale_nofilter;
+        proc = state.fFilterQuality != kNone_BitmapFilter ? check_scale_filter : check_scale_nofilter;
     } else {
-        proc = state.fDoFilter ? check_affine_filter : check_affine_nofilter;
+        proc = state.fFilterQuality != kNone_BitmapFilter ? check_affine_filter : check_affine_nofilter;
     }
     proc(bitmapXY, count, state.fBitmap->width(), state.fBitmap->height());
 }
@@ -721,7 +849,7 @@
         size >>= 2;
     }
 
-    if (fDoFilter) {
+    if (fFilterQuality != kNone_BitmapFilter) {
         size >>= 1;
     }
 
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index 5c6a415..a644dd1 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -64,8 +64,8 @@
     typedef U16CPU (*FixedTileLowBitsProc)(SkFixed, int);   // returns 0..0xF
     typedef U16CPU (*IntTileProc)(int value, int count);   // returns 0..count-1
 
-    const SkBitmap*     fBitmap;            // chooseProcs - orig or mip
-    const SkMatrix*     fInvMatrix;         // chooseProcs
+    const SkBitmap*     fBitmap;            // chooseProcs - orig or scaled
+    SkMatrix            fInvMatrix;         // chooseProcs
     SkMatrix::MapXYProc fInvProc;           // chooseProcs
 
     SkFractionalInt     fInvSxFractionalInt;
@@ -86,7 +86,18 @@
     uint8_t             fInvType;           // chooseProcs
     uint8_t             fTileModeX;         // CONSTRUCTOR
     uint8_t             fTileModeY;         // CONSTRUCTOR
-    SkBool8             fDoFilter;          // chooseProcs
+
+    enum {
+        kNone_BitmapFilter,
+        kBilerp_BitmapFilter,
+        kHQ_BitmapFilter
+    } fFilterQuality;          // chooseProcs
+
+    /** The shader will let us know when we can release some of our resources
+      * like scaled bitmaps.
+      */
+
+    void endContext();
 
     /** Platforms implement this, and can optionally overwrite only the
         following fields:
@@ -140,18 +151,18 @@
     SampleProc32        fSampleProc32;      // chooseProcs
     SampleProc16        fSampleProc16;      // chooseProcs
 
-    SkMatrix            fUnitInvMatrix;     // chooseProcs
     SkBitmap            fOrigBitmap;        // CONSTRUCTOR
-    SkBitmap            fMipBitmap;
+    SkBitmap            fScaledBitmap;      // chooseProcs
 
     MatrixProc chooseMatrixProc(bool trivial_matrix);
     bool chooseProcs(const SkMatrix& inv, const SkPaint&);
     ShaderProc32 chooseShaderProc32();
 
-    void buildFilterCoefficients(SkFixed dst[4], float t) const;
+    void possiblyScaleImage();
+
     SkBitmapFilter *fBitmapFilter;
 
-    ShaderProc32 chooseBitmapFilterProc(const SkPaint &paint);
+    ShaderProc32 chooseBitmapFilterProc();
 
     // Return false if we failed to setup for fast translate (e.g. overflow)
     bool setupForTranslate();
diff --git a/src/core/SkBitmapProcState_matrix.h b/src/core/SkBitmapProcState_matrix.h
index cc65c2d..d796d0b 100644
--- a/src/core/SkBitmapProcState_matrix.h
+++ b/src/core/SkBitmapProcState_matrix.h
@@ -55,7 +55,7 @@
     SkFractionalInt fx;
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                                   SkIntToScalar(y) + SK_ScalarHalf, &pt);
         fx = SkScalarToFractionalInt(pt.fY);
         const unsigned maxY = s.fBitmap->height() - 1;
@@ -116,7 +116,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -143,7 +143,7 @@
     int maxX = s.fBitmap->width() - 1;
     int maxY = s.fBitmap->height() - 1;
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
@@ -188,7 +188,7 @@
 
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                                   SkIntToScalar(y) + SK_ScalarHalf, &pt);
         const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
         const unsigned maxY = s.fBitmap->height() - 1;
@@ -222,7 +222,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -254,7 +254,7 @@
     SkFixed oneX = s.fFilterOneX;
     SkFixed oneY = s.fFilterOneY;
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
diff --git a/src/core/SkBitmapProcState_matrixProcs.cpp b/src/core/SkBitmapProcState_matrixProcs.cpp
index 15c17b6..d3cd550 100644
--- a/src/core/SkBitmapProcState_matrixProcs.cpp
+++ b/src/core/SkBitmapProcState_matrixProcs.cpp
@@ -311,7 +311,7 @@
 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
                                    int x, int y) {
     SkPoint pt;
-    s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &pt);
     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
                            s.fBitmap->height());
@@ -472,7 +472,7 @@
 //    test_int_tileprocs();
     // check for our special case when there is no scale/affine/perspective
     if (trivial_matrix) {
-        SkASSERT(!fDoFilter);
+        SkASSERT(kNone_BitmapFilter == fFilterQuality);
         fIntTileProcY = choose_int_tile_proc(fTileModeY);
         switch (fTileModeX) {
             case SkShader::kClamp_TileMode:
@@ -485,7 +485,7 @@
     }
 
     int index = 0;
-    if (fDoFilter) {
+    if (fFilterQuality != kNone_BitmapFilter) {
         index = 1;
     }
     if (fInvType & SkMatrix::kPerspective_Mask) {
diff --git a/src/core/SkBitmapProcState_sample.h b/src/core/SkBitmapProcState_sample.h
index ea377f2..ac14b96 100644
--- a/src/core/SkBitmapProcState_sample.h
+++ b/src/core/SkBitmapProcState_sample.h
@@ -42,7 +42,7 @@
                               const uint32_t* SK_RESTRICT xy,
                               int count, DSTTYPE* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter == false);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
     SkDEBUGCODE(CHECKSTATE(s);)
 
 #ifdef PREAMBLE
@@ -85,7 +85,7 @@
                             int count, DSTTYPE* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
     SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask));
-    SkASSERT(s.fDoFilter == false);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
     SkDEBUGCODE(CHECKSTATE(s);)
 
 #ifdef PREAMBLE
@@ -139,7 +139,7 @@
                           const uint32_t* SK_RESTRICT xy,
                            int count, DSTTYPE* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkDEBUGCODE(CHECKSTATE(s);)
 
 #ifdef PREAMBLE
@@ -185,7 +185,7 @@
                             const uint32_t* SK_RESTRICT xy,
                             int count, DSTTYPE* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkDEBUGCODE(CHECKSTATE(s);)
 
 #ifdef PREAMBLE
diff --git a/src/core/SkBitmapProcState_shaderproc.h b/src/core/SkBitmapProcState_shaderproc.h
index 6b8f74a..d765b8e 100644
--- a/src/core/SkBitmapProcState_shaderproc.h
+++ b/src/core/SkBitmapProcState_shaderproc.h
@@ -21,7 +21,7 @@
                              SkMatrix::kScale_Mask)) == 0);
     SkASSERT(s.fInvKy == 0);
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkDEBUGCODE(CHECKSTATE(s);)
 
     const unsigned maxX = s.fBitmap->width() - 1;
@@ -34,7 +34,7 @@
 
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
                    SkIntToScalar(y) + SK_ScalarHalf, &pt);
         SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
         const unsigned maxY = s.fBitmap->height() - 1;
diff --git a/src/gpu/GrAAHairLinePathRenderer.cpp b/src/gpu/GrAAHairLinePathRenderer.cpp
index 85121bb..1895da9 100644
--- a/src/gpu/GrAAHairLinePathRenderer.cpp
+++ b/src/gpu/GrAAHairLinePathRenderer.cpp
@@ -1188,4 +1188,3 @@
 
     return true;
 }
-
diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp
index 685ec77..f992bcb 100644
--- a/src/opts/SkBitmapFilter_opts_SSE2.cpp
+++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp
@@ -55,7 +55,7 @@
 
     while (count-- > 0) {
         SkPoint srcPt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
                     SkIntToScalar(y), &srcPt);
         srcPt.fX -= SK_ScalarHalf;
         srcPt.fY -= SK_ScalarHalf;
@@ -72,10 +72,10 @@
         int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)));
 
         for (int src_y = y0; src_y <= y1; src_y++) {
-            float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y) );
+            float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fY - src_y));
 
             for (int src_x = x0; src_x <= x1 ; src_x++) {
-                float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - src_x) );
+                float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fX - src_x));
 
                 float combined_weight = xweight * yweight;
 
@@ -118,7 +118,7 @@
     const int maxY = s.fBitmap->height() - 1;
 
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
+    s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
                 SkIntToScalar(y), &srcPt);
     srcPt.fY -= SK_ScalarHalf;
     int sy = SkScalarFloorToInt(srcPt.fY);
@@ -139,10 +139,10 @@
         int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)));
 
         for (int src_y = y0; src_y <= y1; src_y++) {
-            float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y) );
+            float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fY - src_y));
 
             for (int src_x = x0; src_x <= x1 ; src_x++) {
-                float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - src_x) );
+                float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fX - src_x));
 
                 float combined_weight = xweight * yweight;
 
@@ -175,7 +175,7 @@
 
         x++;
 
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
                     SkIntToScalar(y), &srcPt);
 
     }
diff --git a/src/opts/SkBitmapProcState_matrix_clamp_neon.h b/src/opts/SkBitmapProcState_matrix_clamp_neon.h
index 5af7a52..23da547 100644
--- a/src/opts/SkBitmapProcState_matrix_clamp_neon.h
+++ b/src/opts/SkBitmapProcState_matrix_clamp_neon.h
@@ -67,8 +67,8 @@
     SkFixed fx;
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                                 SkIntToScalar(y) + SK_ScalarHalf, &pt);
         fx = SkScalarToFixed(pt.fY);
         const unsigned maxY = s.fBitmap->height() - 1;
         *xy++ = TILEY_PROCF(fx, maxY);
@@ -169,7 +169,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -282,7 +282,7 @@
     int maxX = s.fBitmap->width() - 1;
     int maxY = s.fBitmap->height() - 1;
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
@@ -492,8 +492,8 @@
 
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                                 SkIntToScalar(y) + SK_ScalarHalf, &pt);
         const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
         const unsigned maxY = s.fBitmap->height() - 1;
         // compute our two Y values up front
@@ -596,7 +596,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -757,7 +757,7 @@
     SkFixed oneX = s.fFilterOneX;
     SkFixed oneY = s.fFilterOneY;
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
diff --git a/src/opts/SkBitmapProcState_matrix_repeat_neon.h b/src/opts/SkBitmapProcState_matrix_repeat_neon.h
index f57d20d..55e2997 100644
--- a/src/opts/SkBitmapProcState_matrix_repeat_neon.h
+++ b/src/opts/SkBitmapProcState_matrix_repeat_neon.h
@@ -65,8 +65,8 @@
     SkFixed fx;
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                                 SkIntToScalar(y) + SK_ScalarHalf, &pt);
         fx = SkScalarToFixed(pt.fY);
         const unsigned maxY = s.fBitmap->height() - 1;
         *xy++ = TILEY_PROCF(fx, maxY);
@@ -167,7 +167,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -284,7 +284,7 @@
     int maxX = s.fBitmap->width() - 1;
     int maxY = s.fBitmap->height() - 1;
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
@@ -422,8 +422,8 @@
 
     {
         SkPoint pt;
-        s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
+        s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                                 SkIntToScalar(y) + SK_ScalarHalf, &pt);
         const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
         const unsigned maxY = s.fBitmap->height() - 1;
         // compute our two Y values up front
@@ -457,7 +457,7 @@
 
     PREAMBLE(s);
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -493,7 +493,7 @@
 
 
 
-    SkPerspIter   iter(*s.fInvMatrix,
+    SkPerspIter   iter(s.fInvMatrix,
                        SkIntToScalar(x) + SK_ScalarHalf,
                        SkIntToScalar(y) + SK_ScalarHalf, count);
 
diff --git a/src/opts/SkBitmapProcState_opts_SSE2.cpp b/src/opts/SkBitmapProcState_opts_SSE2.cpp
index 4bba8c3..0c84d00 100644
--- a/src/opts/SkBitmapProcState_opts_SSE2.cpp
+++ b/src/opts/SkBitmapProcState_opts_SSE2.cpp
@@ -15,7 +15,7 @@
                                    const uint32_t* xy,
                                    int count, uint32_t* colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
     SkASSERT(s.fAlphaScale == 256);
 
@@ -121,7 +121,7 @@
                                   const uint32_t* xy,
                                   int count, uint32_t* colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
     SkASSERT(s.fAlphaScale < 256);
 
@@ -255,8 +255,8 @@
     SkFixed fx;
 
     SkPoint pt;
-    s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                SkIntToScalar(y) + SK_ScalarHalf, &pt);
+    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                             SkIntToScalar(y) + SK_ScalarHalf, &pt);
     const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
     const unsigned maxY = s.fBitmap->height() - 1;
     // compute our two Y values up front
@@ -376,8 +376,8 @@
     const unsigned maxX = s.fBitmap->width() - 1;
     SkFixed fx;
     SkPoint pt;
-    s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
-                                SkIntToScalar(y) + SK_ScalarHalf, &pt);
+    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
+                             SkIntToScalar(y) + SK_ScalarHalf, &pt);
     fx = SkScalarToFixed(pt.fY);
     const unsigned maxY = s.fBitmap->height() - 1;
     *xy++ = SkClampMax(fx >> 16, maxY);
@@ -490,7 +490,7 @@
 void ClampX_ClampY_filter_affine_SSE2(const SkBitmapProcState& s,
                                       uint32_t xy[], int count, int x, int y) {
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -566,7 +566,7 @@
                              SkMatrix::kAffine_Mask)) == 0);
 
     SkPoint srcPt;
-    s.fInvProc(*s.fInvMatrix,
+    s.fInvProc(s.fInvMatrix,
                SkIntToScalar(x) + SK_ScalarHalf,
                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
 
@@ -641,7 +641,7 @@
                                    const uint32_t* xy,
                                    int count, uint16_t* colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
     SkASSERT(s.fBitmap->isOpaque());
 
diff --git a/src/opts/SkBitmapProcState_opts_SSSE3.cpp b/src/opts/SkBitmapProcState_opts_SSSE3.cpp
index 1246b95..f18b7e1 100644
--- a/src/opts/SkBitmapProcState_opts_SSSE3.cpp
+++ b/src/opts/SkBitmapProcState_opts_SSSE3.cpp
@@ -385,7 +385,7 @@
                                      const uint32_t* xy,
                                      int count, uint32_t* colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
     if (has_alpha) {
         SkASSERT(s.fAlphaScale < 256);
@@ -576,7 +576,7 @@
                                        const uint32_t* xy,
                                        int count, uint32_t* colors) {
     SkASSERT(count > 0 && colors != NULL);
-    SkASSERT(s.fDoFilter);
+    SkASSERT(s.fFilterQuality != SkBitmapProcState::kNone_BitmapFilter);
     SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
     if (has_alpha) {
         SkASSERT(s.fAlphaScale < 256);
diff --git a/src/opts/SkBitmapProcState_opts_arm.cpp b/src/opts/SkBitmapProcState_opts_arm.cpp
index e044ad8..99da0da 100644
--- a/src/opts/SkBitmapProcState_opts_arm.cpp
+++ b/src/opts/SkBitmapProcState_opts_arm.cpp
@@ -23,7 +23,7 @@
                              int count, uint16_t* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
     SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask));
-    SkASSERT(s.fDoFilter == false);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
 
     const uint16_t* SK_RESTRICT table = s.fBitmap->getColorTable()->lock16BitCache();
     const uint8_t* SK_RESTRICT srcAddr = (const uint8_t*)s.fBitmap->getPixels();
@@ -114,7 +114,7 @@
                                     int count, SkPMColor* SK_RESTRICT colors) {
     SkASSERT(count > 0 && colors != NULL);
     SkASSERT(s.fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask));
-    SkASSERT(s.fDoFilter == false);
+    SkASSERT(SkBitmapProcState::kNone_BitmapFilter == s.fFilterQuality);
 
     const SkPMColor* SK_RESTRICT table = s.fBitmap->getColorTable()->lockColors();
     const uint8_t* SK_RESTRICT srcAddr = (const uint8_t*)s.fBitmap->getPixels();
@@ -190,7 +190,6 @@
     otherwise the shader won't even look at the matrix/sampler
  */
 void SkBitmapProcState::platformProcs() {
-    bool doFilter = fDoFilter;
     bool isOpaque = 256 == fAlphaScale;
     bool justDx = false;
 
@@ -201,7 +200,7 @@
     switch (fBitmap->config()) {
         case SkBitmap::kIndex8_Config:
 #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
-            if (justDx && !doFilter) {
+            if (justDx && kNone_BitmapFilter == fFilterQuality) {
 #if 0   /* crashing on android device */
                 fSampleProc16 = SI8_D16_nofilter_DX_arm;
                 fShaderProc16 = NULL;