diff --git a/src/effects/GrAlphaThresholdFragmentProcessor.cpp b/src/effects/GrAlphaThresholdFragmentProcessor.cpp
index 51869f3..ede2218 100644
--- a/src/effects/GrAlphaThresholdFragmentProcessor.cpp
+++ b/src/effects/GrAlphaThresholdFragmentProcessor.cpp
@@ -9,8 +9,8 @@
  * This file was autogenerated from GrAlphaThresholdFragmentProcessor.fp; do not modify.
  */
 #include "GrAlphaThresholdFragmentProcessor.h"
+#if SK_SUPPORT_GPU
 
-    #if SK_SUPPORT_GPU
     inline GrFragmentProcessor::OptimizationFlags GrAlphaThresholdFragmentProcessor::optFlags(
                                                                              float outerThreshold) {
         if (outerThreshold >= 1.0) {
@@ -38,7 +38,7 @@
         fOuterThresholdVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kFloat_GrSLType, kDefault_GrSLPrecision, "outerThreshold");
         SkSL::String sk_TransformedCoords2D_0 = fragBuilder->ensureCoords2D(args.fTransformedCoords[0]);
         SkSL::String sk_TransformedCoords2D_1 = fragBuilder->ensureCoords2D(args.fTransformedCoords[1]);
-        fragBuilder->codeAppendf("vec4 _tmp0;\nvec4 color = (_tmp0 = texture(%s, %s) , %s != mat4(1.0) ? vec4(clamp((%s * vec4(_tmp0.xyz, 1.0)).xyz, 0.0, _tmp0.w), _tmp0.w) : _tmp0);\nvec4 mask_color = texture(%s, %s);\nif (mask_color.w < 0.5) {\n    if (color.w > %s) {\n        float scale = %s / color.w;\n        color.xyz *= scale;\n        color.w = %s;\n    }\n} else if (color.w < %s) {\n    float scale = %s / max(0.001, color.w);\n    color.xyz *= scale;\n    color.w = %s;\n}\n%s = color;\n", fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]).c_str(), sk_TransformedCoords2D_0.c_str(), fColorSpaceHelper.isValid() ? args.fUniformHandler->getUniformCStr(fColorSpaceHelper.gamutXformUniform()) : "mat4(1.0)", fColorSpaceHelper.isValid() ? args.fUniformHandler->getUniformCStr(fColorSpaceHelper.gamutXformUniform()) : "mat4(1.0)", fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[1]).c_str(), sk_TransformedCoords2D_1.c_str(), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fOutputColor);
+        fragBuilder->codeAppendf("vec4 _tmp0;\nvec4 color = (_tmp0 = texture(%s, %s).%s , %s != mat4(1.0) ? vec4(clamp((%s * vec4(_tmp0.xyz, 1.0)).xyz, 0.0, _tmp0.w), _tmp0.w) : _tmp0);\nvec4 mask_color = texture(%s, %s).%s;\nif (mask_color.w < 0.5) {\n    if (color.w > %s) {\n        float scale = %s / color.w;\n        color.xyz *= scale;\n        color.w = %s;\n    }\n} else if (color.w < %s) {\n    float scale = %s / max(0.001, color.w);\n    color.xyz *= scale;\n    color.w = %s;\n}\n%s = color;\n", fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]).c_str(), sk_TransformedCoords2D_0.c_str(), fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str(), fColorSpaceHelper.isValid() ? args.fUniformHandler->getUniformCStr(fColorSpaceHelper.gamutXformUniform()) : "mat4(1.0)", fColorSpaceHelper.isValid() ? args.fUniformHandler->getUniformCStr(fColorSpaceHelper.gamutXformUniform()) : "mat4(1.0)", fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[1]).c_str(), sk_TransformedCoords2D_1.c_str(), fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[1]).c_str(), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fOuterThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fUniformHandler->getUniformCStr(fInnerThresholdVar), args.fOutputColor);
     }
 private:
     void onSetData(const GrGLSLProgramDataManager& pdman, const GrFragmentProcessor& _proc) override {
@@ -98,5 +98,4 @@
                                 bounds);
 }
 #endif
-
-    #endif
+#endif
diff --git a/src/effects/GrAlphaThresholdFragmentProcessor.fp b/src/effects/GrAlphaThresholdFragmentProcessor.fp
index 1cc38c6..b576ecf 100644
--- a/src/effects/GrAlphaThresholdFragmentProcessor.fp
+++ b/src/effects/GrAlphaThresholdFragmentProcessor.fp
@@ -45,17 +45,10 @@
 }
 
 @header {
-    #include "SkTypes.h"
-    #if SK_SUPPORT_GPU
     #include "GrColorSpaceXform.h"
 }
 
-@headerEnd {
-    #endif
-}
-
 @cpp {
-    #if SK_SUPPORT_GPU
     inline GrFragmentProcessor::OptimizationFlags GrAlphaThresholdFragmentProcessor::optFlags(
                                                                              float outerThreshold) {
         if (outerThreshold >= 1.0) {
@@ -67,10 +60,6 @@
     }
 }
 
-@cppEnd {
-    #endif
-}
-
 void main() {
     vec4 color = texture(image, sk_TransformedCoords2D[0], colorXform);
     vec4 mask_color = texture(mask, sk_TransformedCoords2D[1]);
diff --git a/src/effects/GrAlphaThresholdFragmentProcessor.h b/src/effects/GrAlphaThresholdFragmentProcessor.h
index 80e4e0b..702b6ae 100644
--- a/src/effects/GrAlphaThresholdFragmentProcessor.h
+++ b/src/effects/GrAlphaThresholdFragmentProcessor.h
@@ -10,9 +10,9 @@
  */
 #ifndef GrAlphaThresholdFragmentProcessor_DEFINED
 #define GrAlphaThresholdFragmentProcessor_DEFINED
+#include "SkTypes.h"
+#if SK_SUPPORT_GPU
 
-    #include "SkTypes.h"
-    #if SK_SUPPORT_GPU
     #include "GrColorSpaceXform.h"
 #include "GrFragmentProcessor.h"
 #include "GrCoordTransform.h"
@@ -75,6 +75,5 @@
     float fOuterThreshold;
     typedef GrFragmentProcessor INHERITED;
 };
-
-    #endif
+#endif
 #endif
diff --git a/src/effects/GrCircleBlurFragmentProcessor.cpp b/src/effects/GrCircleBlurFragmentProcessor.cpp
index d99f0c7..5759fc4 100644
--- a/src/effects/GrCircleBlurFragmentProcessor.cpp
+++ b/src/effects/GrCircleBlurFragmentProcessor.cpp
@@ -1,359 +1,316 @@
 /*
- * Copyright 2015 Google Inc.
+ * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
+/*
+ * This file was autogenerated from GrCircleBlurFragmentProcessor.fp; do not modify.
+ */
 #include "GrCircleBlurFragmentProcessor.h"
-
 #if SK_SUPPORT_GPU
 
-#include "GrContext.h"
-#include "GrResourceProvider.h"
-#include "glsl/GrGLSLFragmentProcessor.h"
-#include "glsl/GrGLSLFragmentShaderBuilder.h"
-#include "glsl/GrGLSLProgramDataManager.h"
-#include "glsl/GrGLSLUniformHandler.h"
+    #include "GrResourceProvider.h"
 
-#include "SkFixed.h"
-
-class GrCircleBlurFragmentProcessor::GLSLProcessor : public GrGLSLFragmentProcessor {
-public:
-    void emitCode(EmitArgs&) override;
-
-protected:
-    void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
-
-private:
-    GrGLSLProgramDataManager::UniformHandle fDataUniform;
-
-    typedef GrGLSLFragmentProcessor INHERITED;
-};
-
-void GrCircleBlurFragmentProcessor::GLSLProcessor::emitCode(EmitArgs& args) {
-    const char *dataName;
-
-    // The data is formatted as:
-    // x,y  - the center of the circle
-    // z    - inner radius that should map to 0th entry in the texture.
-    // w    - the inverse of the distance over which the texture is stretched.
-    fDataUniform = args.fUniformHandler->addUniform(kFragment_GrShaderFlag,
-                                                    kVec4f_GrSLType,
-                                                    kDefault_GrSLPrecision,
-                                                    "data",
-                                                    &dataName);
-
-    GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
-
-    if (args.fInputColor) {
-        fragBuilder->codeAppendf("vec4 src=%s;", args.fInputColor);
-    } else {
-        fragBuilder->codeAppendf("vec4 src=vec4(1);");
-    }
-
-    // We just want to compute "(length(vec) - %s.z + 0.5) * %s.w" but need to rearrange
-    // for precision.
-    fragBuilder->codeAppendf("vec2 vec = vec2( (sk_FragCoord.x - %s.x) * %s.w, "
-                                              "(sk_FragCoord.y - %s.y) * %s.w );",
-                             dataName, dataName, dataName, dataName);
-    fragBuilder->codeAppendf("float dist = length(vec) + (0.5 - %s.z) * %s.w;",
-                             dataName, dataName);
-
-    fragBuilder->codeAppendf("float intensity = ");
-    fragBuilder->appendTextureLookup(args.fTexSamplers[0], "vec2(dist, 0.5)");
-    fragBuilder->codeAppend(".a;");
-
-    fragBuilder->codeAppendf("%s = src * intensity;\n", args.fOutputColor );
-}
-
-void GrCircleBlurFragmentProcessor::GLSLProcessor::onSetData(const GrGLSLProgramDataManager& pdman,
-                                                             const GrFragmentProcessor& proc) {
-    const GrCircleBlurFragmentProcessor& cbfp = proc.cast<GrCircleBlurFragmentProcessor>();
-    const SkRect& circle = cbfp.fCircle;
-
-    // The data is formatted as:
-    // x,y  - the center of the circle
-    // z    - inner radius that should map to 0th entry in the texture.
-    // w    - the inverse of the distance over which the profile texture is stretched.
-    pdman.set4f(fDataUniform, circle.centerX(), circle.centerY(), cbfp.fSolidRadius,
-                1.f / cbfp.fTextureRadius);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-GrCircleBlurFragmentProcessor::GrCircleBlurFragmentProcessor(const SkRect& circle,
-                                                             float textureRadius,
-                                                             float solidRadius,
-                                                             sk_sp<GrTextureProxy> blurProfile)
-        : INHERITED(kCompatibleWithCoverageAsAlpha_OptimizationFlag)
-        , fCircle(circle)
-        , fSolidRadius(solidRadius)
-        , fTextureRadius(textureRadius)
-        , fBlurProfileSampler(std::move(blurProfile), GrSamplerParams::kBilerp_FilterMode) {
-    this->initClassID<GrCircleBlurFragmentProcessor>();
-    this->addTextureSampler(&fBlurProfileSampler);
-}
-
-GrGLSLFragmentProcessor* GrCircleBlurFragmentProcessor::onCreateGLSLInstance() const {
-    return new GLSLProcessor;
-}
-
-void GrCircleBlurFragmentProcessor::onGetGLSLProcessorKey(const GrShaderCaps& caps,
-                                                          GrProcessorKeyBuilder* b) const {
-    // The code for this processor is always the same so there is nothing to add to the key.
-    return;
-}
-
-// Computes an unnormalized half kernel (right side). Returns the summation of all the half kernel
-// values.
-static float make_unnormalized_half_kernel(float* halfKernel, int halfKernelSize, float sigma) {
-    const float invSigma = 1.f / sigma;
-    const float b = -0.5f * invSigma * invSigma;
-    float tot = 0.0f;
-    // Compute half kernel values at half pixel steps out from the center.
-    float t = 0.5f;
-    for (int i = 0; i < halfKernelSize; ++i) {
-        float value = expf(t * t * b);
-        tot += value;
-        halfKernel[i] = value;
-        t += 1.f;
-    }
-    return tot;
-}
-
-// Create a Gaussian half-kernel (right side) and a summed area table given a sigma and number of
-// discrete steps. The half kernel is normalized to sum to 0.5.
-static void make_half_kernel_and_summed_table(float* halfKernel, float* summedHalfKernel,
-                                              int halfKernelSize, float sigma) {
-    // The half kernel should sum to 0.5 not 1.0.
-    const float tot = 2.f * make_unnormalized_half_kernel(halfKernel, halfKernelSize, sigma);
-    float sum = 0.f;
-    for (int i = 0; i < halfKernelSize; ++i) {
-        halfKernel[i] /= tot;
-        sum += halfKernel[i];
-        summedHalfKernel[i] = sum;
-    }
-}
-
-// Applies the 1D half kernel vertically at points along the x axis to a circle centered at the
-// origin with radius circleR.
-void apply_kernel_in_y(float* results, int numSteps, float firstX, float circleR,
-                       int halfKernelSize, const float* summedHalfKernelTable) {
-    float x = firstX;
-    for (int i = 0; i < numSteps; ++i, x += 1.f) {
-        if (x < -circleR || x > circleR) {
-            results[i] = 0;
-            continue;
+    
+    
+    static float make_unnormalized_half_kernel(float* halfKernel, int halfKernelSize, float sigma) {
+        const float invSigma = 1.f / sigma;
+        const float b = -0.5f * invSigma * invSigma;
+        float tot = 0.0f;
+        
+        float t = 0.5f;
+        for (int i = 0; i < halfKernelSize; ++i) {
+            float value = expf(t * t * b);
+            tot += value;
+            halfKernel[i] = value;
+            t += 1.f;
         }
-        float y = sqrtf(circleR * circleR - x * x);
-        // In the column at x we exit the circle at +y and -y
-        // The summed table entry j is actually reflects an offset of j + 0.5.
-        y -= 0.5f;
-        int yInt = SkScalarFloorToInt(y);
-        SkASSERT(yInt >= -1);
-        if (y < 0) {
-            results[i] = (y + 0.5f) * summedHalfKernelTable[0];
-        } else if (yInt >= halfKernelSize - 1) {
-            results[i] = 0.5f;
+        return tot;
+    }
+
+    
+    
+    static void make_half_kernel_and_summed_table(float* halfKernel, float* summedHalfKernel,
+                                                  int halfKernelSize, float sigma) {
+        
+        const float tot = 2.f * make_unnormalized_half_kernel(halfKernel, halfKernelSize, sigma);
+        float sum = 0.f;
+        for (int i = 0; i < halfKernelSize; ++i) {
+            halfKernel[i] /= tot;
+            sum += halfKernel[i];
+            summedHalfKernel[i] = sum;
+        }
+    }
+
+    
+    
+    void apply_kernel_in_y(float* results, int numSteps, float firstX, float circleR,
+                           int halfKernelSize, const float* summedHalfKernelTable) {
+        float x = firstX;
+        for (int i = 0; i < numSteps; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                results[i] = 0;
+                continue;
+            }
+            float y = sqrtf(circleR * circleR - x * x);
+            
+            
+            y -= 0.5f;
+            int yInt = SkScalarFloorToInt(y);
+            SkASSERT(yInt >= -1);
+            if (y < 0) {
+                results[i] = (y + 0.5f) * summedHalfKernelTable[0];
+            } else if (yInt >= halfKernelSize - 1) {
+                results[i] = 0.5f;
+            } else {
+                float yFrac = y - yInt;
+                results[i] = (1.f - yFrac) * summedHalfKernelTable[yInt] +
+                             yFrac * summedHalfKernelTable[yInt + 1];
+            }
+        }
+    }
+
+    
+    
+    
+    
+    static uint8_t eval_at(float evalX, float circleR, const float* halfKernel, int halfKernelSize,
+                           const float* yKernelEvaluations) {
+        float acc = 0;
+
+        float x = evalX - halfKernelSize;
+        for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                continue;
+            }
+            float verticalEval = yKernelEvaluations[i];
+            acc += verticalEval * halfKernel[halfKernelSize - i - 1];
+        }
+        for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                continue;
+            }
+            float verticalEval = yKernelEvaluations[i + halfKernelSize];
+            acc += verticalEval * halfKernel[i];
+        }
+        
+        
+        return SkUnitScalarClampToByte(2.f * acc);
+    }
+
+    
+    
+    
+    
+    
+    
+    
+    static uint8_t* create_circle_profile(float sigma, float circleR, int profileTextureWidth) {
+        const int numSteps = profileTextureWidth;
+        uint8_t* weights = new uint8_t[numSteps];
+
+        
+        int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);
+        
+        halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
+
+        
+        int numYSteps = numSteps + 2 * halfKernelSize;
+
+        SkAutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
+        float* halfKernel = bulkAlloc.get();
+        float* summedKernel = bulkAlloc.get() + halfKernelSize;
+        float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
+        make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
+
+        float firstX = -halfKernelSize + 0.5f;
+        apply_kernel_in_y(yEvals, numYSteps, firstX, circleR, halfKernelSize, summedKernel);
+
+        for (int i = 0; i < numSteps - 1; ++i) {
+            float evalX = i + 0.5f;
+            weights[i] = eval_at(evalX, circleR, halfKernel, halfKernelSize, yEvals + i);
+        }
+        
+        weights[numSteps - 1] = 0;
+        return weights;
+    }
+
+    static uint8_t* create_half_plane_profile(int profileWidth) {
+        SkASSERT(!(profileWidth & 0x1));
+        
+        float sigma = profileWidth / 6.f;
+        int halfKernelSize = profileWidth / 2;
+
+        SkAutoTArray<float> halfKernel(halfKernelSize);
+        uint8_t* profile = new uint8_t[profileWidth];
+
+        
+        const float tot = 2.f * make_unnormalized_half_kernel(halfKernel.get(), halfKernelSize,
+                                                              sigma);
+        float sum = 0.f;
+        
+        for (int i = 0; i < halfKernelSize; ++i) {
+            halfKernel[halfKernelSize - i - 1] /= tot;
+            sum += halfKernel[halfKernelSize - i - 1];
+            profile[profileWidth - i - 1] = SkUnitScalarClampToByte(sum);
+        }
+        
+        
+        for (int i = 0; i < halfKernelSize; ++i) {
+            sum += halfKernel[i];
+            profile[halfKernelSize - i - 1] = SkUnitScalarClampToByte(sum);
+        }
+        
+        profile[profileWidth - 1] = 0;
+        return profile;
+    }
+
+    static sk_sp<GrTextureProxy> create_profile_texture(GrResourceProvider* resourceProvider,
+                                                        const SkRect& circle,
+                                                        float sigma,
+                                                        float* solidRadius, float* textureRadius) {
+        float circleR = circle.width() / 2.0f;
+        
+        
+        SkScalar sigmaToCircleRRatio = sigma / circleR;
+        
+        
+        
+        
+        sigmaToCircleRRatio = SkTMin(sigmaToCircleRRatio, 8.f);
+        SkFixed sigmaToCircleRRatioFixed;
+        static const SkScalar kHalfPlaneThreshold = 0.1f;
+        bool useHalfPlaneApprox = false;
+        if (sigmaToCircleRRatio <= kHalfPlaneThreshold) {
+            useHalfPlaneApprox = true;
+            sigmaToCircleRRatioFixed = 0;
+            *solidRadius = circleR - 3 * sigma;
+            *textureRadius = 6 * sigma;
         } else {
-            float yFrac = y - yInt;
-            results[i] = (1.f - yFrac) * summedHalfKernelTable[yInt] +
-                         yFrac * summedHalfKernelTable[yInt + 1];
-        }
-    }
-}
-
-// Apply a Gaussian at point (evalX, 0) to a circle centered at the origin with radius circleR.
-// This relies on having a half kernel computed for the Gaussian and a table of applications of
-// the half kernel in y to columns at (evalX - halfKernel, evalX - halfKernel + 1, ..., evalX +
-// halfKernel) passed in as yKernelEvaluations.
-static uint8_t eval_at(float evalX, float circleR, const float* halfKernel, int halfKernelSize,
-                       const float* yKernelEvaluations) {
-    float acc = 0;
-
-    float x = evalX - halfKernelSize;
-    for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
-        if (x < -circleR || x > circleR) {
-            continue;
-        }
-        float verticalEval = yKernelEvaluations[i];
-        acc += verticalEval * halfKernel[halfKernelSize - i - 1];
-    }
-    for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
-        if (x < -circleR || x > circleR) {
-            continue;
-        }
-        float verticalEval = yKernelEvaluations[i + halfKernelSize];
-        acc += verticalEval * halfKernel[i];
-    }
-    // Since we applied a half kernel in y we multiply acc by 2 (the circle is symmetric about the
-    // x axis).
-    return SkUnitScalarClampToByte(2.f * acc);
-}
-
-// This function creates a profile of a blurred circle. It does this by computing a kernel for
-// half the Gaussian and a matching summed area table. The summed area table is used to compute
-// an array of vertical applications of the half kernel to the circle along the x axis. The table
-// of y evaluations has 2 * k + n entries where k is the size of the half kernel and n is the size
-// of the profile being computed. Then for each of the n profile entries we walk out k steps in each
-// horizontal direction multiplying the corresponding y evaluation by the half kernel entry and
-// sum these values to compute the profile entry.
-static uint8_t* create_circle_profile(float sigma, float circleR, int profileTextureWidth) {
-    const int numSteps = profileTextureWidth;
-    uint8_t* weights = new uint8_t[numSteps];
-
-    // The full kernel is 6 sigmas wide.
-    int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);
-    // round up to next multiple of 2 and then divide by 2
-    halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
-
-    // Number of x steps at which to apply kernel in y to cover all the profile samples in x.
-    int numYSteps = numSteps + 2 * halfKernelSize;
-
-    SkAutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
-    float* halfKernel = bulkAlloc.get();
-    float* summedKernel = bulkAlloc.get() + halfKernelSize;
-    float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
-    make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
-
-    float firstX = -halfKernelSize + 0.5f;
-    apply_kernel_in_y(yEvals, numYSteps, firstX, circleR, halfKernelSize, summedKernel);
-
-    for (int i = 0; i < numSteps - 1; ++i) {
-        float evalX = i + 0.5f;
-        weights[i] = eval_at(evalX, circleR, halfKernel, halfKernelSize, yEvals + i);
-    }
-    // Ensure the tail of the Gaussian goes to zero.
-    weights[numSteps - 1] = 0;
-    return weights;
-}
-
-static uint8_t* create_half_plane_profile(int profileWidth) {
-    SkASSERT(!(profileWidth & 0x1));
-    // The full kernel is 6 sigmas wide.
-    float sigma = profileWidth / 6.f;
-    int halfKernelSize = profileWidth / 2;
-
-    SkAutoTArray<float> halfKernel(halfKernelSize);
-    uint8_t* profile = new uint8_t[profileWidth];
-
-    // The half kernel should sum to 0.5.
-    const float tot = 2.f * make_unnormalized_half_kernel(halfKernel.get(), halfKernelSize, sigma);
-    float sum = 0.f;
-    // Populate the profile from the right edge to the middle.
-    for (int i = 0; i < halfKernelSize; ++i) {
-        halfKernel[halfKernelSize - i - 1] /= tot;
-        sum += halfKernel[halfKernelSize - i - 1];
-        profile[profileWidth - i - 1] = SkUnitScalarClampToByte(sum);
-    }
-    // Populate the profile from the middle to the left edge (by flipping the half kernel and
-    // continuing the summation).
-    for (int i = 0; i < halfKernelSize; ++i) {
-        sum += halfKernel[i];
-        profile[halfKernelSize - i - 1] = SkUnitScalarClampToByte(sum);
-    }
-    // Ensure tail goes to 0.
-    profile[profileWidth - 1] = 0;
-    return profile;
-}
-
-static sk_sp<GrTextureProxy> create_profile_texture(GrResourceProvider* resourceProvider,
-                                                    const SkRect& circle,
-                                                    float sigma,
-                                                    float* solidRadius, float* textureRadius) {
-    float circleR = circle.width() / 2.0f;
-    // Profile textures are cached by the ratio of sigma to circle radius and by the size of the
-    // profile texture (binned by powers of 2).
-    SkScalar sigmaToCircleRRatio = sigma / circleR;
-    // When sigma is really small this becomes a equivalent to convolving a Gaussian with a half-
-    // plane. Similarly, in the extreme high ratio cases circle becomes a point WRT to the Guassian
-    // and the profile texture is a just a Gaussian evaluation. However, we haven't yet implemented
-    // this latter optimization.
-    sigmaToCircleRRatio = SkTMin(sigmaToCircleRRatio, 8.f);
-    SkFixed sigmaToCircleRRatioFixed;
-    static const SkScalar kHalfPlaneThreshold = 0.1f;
-    bool useHalfPlaneApprox = false;
-    if (sigmaToCircleRRatio <= kHalfPlaneThreshold) {
-        useHalfPlaneApprox = true;
-        sigmaToCircleRRatioFixed = 0;
-        *solidRadius = circleR - 3 * sigma;
-        *textureRadius = 6 * sigma;
-    } else {
-        // Convert to fixed point for the key.
-        sigmaToCircleRRatioFixed = SkScalarToFixed(sigmaToCircleRRatio);
-        // We shave off some bits to reduce the number of unique entries. We could probably shave
-        // off more than we do.
-        sigmaToCircleRRatioFixed &= ~0xff;
-        sigmaToCircleRRatio = SkFixedToScalar(sigmaToCircleRRatioFixed);
-        sigma = circleR * sigmaToCircleRRatio;
-        *solidRadius = 0;
-        *textureRadius = circleR + 3 * sigma;
-    }
-
-    static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
-    GrUniqueKey key;
-    GrUniqueKey::Builder builder(&key, kDomain, 1);
-    builder[0] = sigmaToCircleRRatioFixed;
-    builder.finish();
-
-    sk_sp<GrTextureProxy> blurProfile = resourceProvider->findProxyByUniqueKey(key);
-    if (!blurProfile) {
-        static constexpr int kProfileTextureWidth = 512;
-        GrSurfaceDesc texDesc;
-        texDesc.fWidth = kProfileTextureWidth;
-        texDesc.fHeight = 1;
-        texDesc.fConfig = kAlpha_8_GrPixelConfig;
-
-        std::unique_ptr<uint8_t[]> profile(nullptr);
-        if (useHalfPlaneApprox) {
-            profile.reset(create_half_plane_profile(kProfileTextureWidth));
-        } else {
-            // Rescale params to the size of the texture we're creating.
-            SkScalar scale = kProfileTextureWidth / *textureRadius;
-            profile.reset(create_circle_profile(sigma * scale, circleR * scale,
-                                                kProfileTextureWidth));
+            
+            sigmaToCircleRRatioFixed = SkScalarToFixed(sigmaToCircleRRatio);
+            
+            
+            sigmaToCircleRRatioFixed &= ~0xff;
+            sigmaToCircleRRatio = SkFixedToScalar(sigmaToCircleRRatioFixed);
+            sigma = circleR * sigmaToCircleRRatio;
+            *solidRadius = 0;
+            *textureRadius = circleR + 3 * sigma;
         }
 
-        blurProfile = GrSurfaceProxy::MakeDeferred(resourceProvider,
-                                                   texDesc, SkBudgeted::kYes, profile.get(), 0);
+        static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+        GrUniqueKey key;
+        GrUniqueKey::Builder builder(&key, kDomain, 1);
+        builder[0] = sigmaToCircleRRatioFixed;
+        builder.finish();
+
+        sk_sp<GrTextureProxy> blurProfile = resourceProvider->findProxyByUniqueKey(key);
         if (!blurProfile) {
+            static constexpr int kProfileTextureWidth = 512;
+            GrSurfaceDesc texDesc;
+            texDesc.fWidth = kProfileTextureWidth;
+            texDesc.fHeight = 1;
+            texDesc.fConfig = kAlpha_8_GrPixelConfig;
+
+            std::unique_ptr<uint8_t[]> profile(nullptr);
+            if (useHalfPlaneApprox) {
+                profile.reset(create_half_plane_profile(kProfileTextureWidth));
+            } else {
+                
+                SkScalar scale = kProfileTextureWidth / *textureRadius;
+                profile.reset(create_circle_profile(sigma * scale, circleR * scale,
+                                                    kProfileTextureWidth));
+            }
+
+            blurProfile = GrSurfaceProxy::MakeDeferred(resourceProvider,
+                                                       texDesc, SkBudgeted::kYes, profile.get(), 0);
+            if (!blurProfile) {
+                return nullptr;
+            }
+
+            resourceProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+        }
+
+        return blurProfile;
+    }
+
+    sk_sp<GrFragmentProcessor> GrCircleBlurFragmentProcessor::Make(
+                                                               GrResourceProvider* resourceProvider,
+                                                               const SkRect& circle,
+                                                               float sigma) {
+        float solidRadius;
+        float textureRadius;
+        sk_sp<GrTextureProxy> profile(create_profile_texture(resourceProvider, circle, sigma,
+                                                             &solidRadius, &textureRadius));
+        if (!profile) {
             return nullptr;
         }
-
-        resourceProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+        return sk_sp<GrFragmentProcessor>(new GrCircleBlurFragmentProcessor(circle,
+                                                                            textureRadius,
+                                                                            solidRadius,
+                                                                            std::move(profile),
+                                                                            resourceProvider));
     }
-
-    return blurProfile;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-
-sk_sp<GrFragmentProcessor> GrCircleBlurFragmentProcessor::Make(GrResourceProvider* resourceProvider,
-                                                               const SkRect& circle, float sigma) {
-    float solidRadius;
-    float textureRadius;
-    sk_sp<GrTextureProxy> profile(create_profile_texture(resourceProvider, circle, sigma,
-                                                         &solidRadius, &textureRadius));
-    if (!profile) {
-        return nullptr;
+#include "glsl/GrGLSLColorSpaceXformHelper.h"
+#include "glsl/GrGLSLFragmentProcessor.h"
+#include "glsl/GrGLSLFragmentShaderBuilder.h"
+#include "glsl/GrGLSLProgramBuilder.h"
+#include "SkSLCPP.h"
+#include "SkSLUtil.h"
+class GrGLSLCircleBlurFragmentProcessor : public GrGLSLFragmentProcessor {
+public:
+    GrGLSLCircleBlurFragmentProcessor() {}
+    void emitCode(EmitArgs& args) override {
+        GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
+        const GrCircleBlurFragmentProcessor& _outer = args.fFp.cast<GrCircleBlurFragmentProcessor>();
+        (void) _outer;
+        fCircleDataVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kVec4f_GrSLType, kDefault_GrSLPrecision, "circleData");
+        fragBuilder->codeAppendf("vec2 vec = vec2((sk_FragCoord.x - %s.x) * %s.w, (sk_FragCoord.y - %s.y) * %s.w);\nfloat dist = length(vec) + (0.5 - %s.z) * %s.w;\n%s = %s * texture(%s, vec2(dist, 0.5)).%s.w;\n", args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fUniformHandler->getUniformCStr(fCircleDataVar), args.fOutputColor, args.fInputColor ? args.fInputColor : "vec4(1)", fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]).c_str(), fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str());
     }
-    return sk_sp<GrFragmentProcessor>(new GrCircleBlurFragmentProcessor(circle,
-                                                                        textureRadius, solidRadius,
-                                                                        std::move(profile)));
+private:
+    void onSetData(const GrGLSLProgramDataManager& data, const GrFragmentProcessor& _proc) override {
+        const GrCircleBlurFragmentProcessor& _outer = _proc.cast<GrCircleBlurFragmentProcessor>();
+        auto circleRect = _outer.circleRect();
+        (void) circleRect;
+        auto textureRadius = _outer.textureRadius();
+        (void) textureRadius;
+        auto solidRadius = _outer.solidRadius();
+        (void) solidRadius;
+        UniformHandle& blurProfileSampler = fBlurProfileSamplerVar;
+        (void) blurProfileSampler;
+        UniformHandle& circleData = fCircleDataVar;
+        (void) circleData;
+
+    data.set4f(circleData, circleRect.centerX(), circleRect.centerY(), solidRadius,
+               1.f / textureRadius);
+    }
+    UniformHandle fCircleDataVar;
+    UniformHandle fBlurProfileSamplerVar;
+};
+GrGLSLFragmentProcessor* GrCircleBlurFragmentProcessor::onCreateGLSLInstance() const {
+    return new GrGLSLCircleBlurFragmentProcessor();
 }
-
-//////////////////////////////////////////////////////////////////////////////
-
+void GrCircleBlurFragmentProcessor::onGetGLSLProcessorKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const {
+}
+bool GrCircleBlurFragmentProcessor::onIsEqual(const GrFragmentProcessor& other) const {
+    const GrCircleBlurFragmentProcessor& that = other.cast<GrCircleBlurFragmentProcessor>();
+    (void) that;
+    if (fCircleRect != that.fCircleRect) return false;
+    if (fTextureRadius != that.fTextureRadius) return false;
+    if (fSolidRadius != that.fSolidRadius) return false;
+    if (fBlurProfileSampler != that.fBlurProfileSampler) return false;
+    return true;
+}
 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrCircleBlurFragmentProcessor);
-
 #if GR_TEST_UTILS
-sk_sp<GrFragmentProcessor> GrCircleBlurFragmentProcessor::TestCreate(GrProcessorTestData* d) {
-    SkScalar wh = d->fRandom->nextRangeScalar(100.f, 1000.f);
-    SkScalar sigma = d->fRandom->nextRangeF(1.f,10.f);
+sk_sp<GrFragmentProcessor> GrCircleBlurFragmentProcessor::TestCreate(GrProcessorTestData* testData) {
+
+    SkScalar wh = testData->fRandom->nextRangeScalar(100.f, 1000.f);
+    SkScalar sigma = testData->fRandom->nextRangeF(1.f,10.f);
     SkRect circle = SkRect::MakeWH(wh, wh);
-    return GrCircleBlurFragmentProcessor::Make(d->resourceProvider(), circle, sigma);
+    return GrCircleBlurFragmentProcessor::Make(testData->resourceProvider(), circle, sigma);
 }
 #endif
-
 #endif
diff --git a/src/effects/GrCircleBlurFragmentProcessor.fp b/src/effects/GrCircleBlurFragmentProcessor.fp
new file mode 100644
index 0000000..dec22e6
--- /dev/null
+++ b/src/effects/GrCircleBlurFragmentProcessor.fp
@@ -0,0 +1,289 @@
+in vec4 circleRect;
+in float textureRadius;
+in float solidRadius;
+in uniform sampler2D blurProfileSampler;
+
+// The data is formatted as:
+// x, y - the center of the circle
+// z    - inner radius that should map to 0th entry in the texture.
+// w    - the inverse of the distance over which the texture is stretched.
+uniform vec4 circleData;
+
+@optimizationFlags {
+    kCompatibleWithCoverageAsAlpha_OptimizationFlag
+}
+
+@constructorParams {
+    GrResourceProvider* resourceProvider
+}
+
+@make {
+    static sk_sp<GrFragmentProcessor> Make(GrResourceProvider* resourceProvider,
+                                           const SkRect& circle, float sigma);
+}
+
+@setData(data) {
+    data.set4f(circleData, circleRect.centerX(), circleRect.centerY(), solidRadius,
+               1.f / textureRadius);
+}
+
+@cpp {
+    #include "GrResourceProvider.h"
+
+    // Computes an unnormalized half kernel (right side). Returns the summation of all the half
+    // kernel values.
+    static float make_unnormalized_half_kernel(float* halfKernel, int halfKernelSize, float sigma) {
+        const float invSigma = 1.f / sigma;
+        const float b = -0.5f * invSigma * invSigma;
+        float tot = 0.0f;
+        // Compute half kernel values at half pixel steps out from the center.
+        float t = 0.5f;
+        for (int i = 0; i < halfKernelSize; ++i) {
+            float value = expf(t * t * b);
+            tot += value;
+            halfKernel[i] = value;
+            t += 1.f;
+        }
+        return tot;
+    }
+
+    // Create a Gaussian half-kernel (right side) and a summed area table given a sigma and number
+    // of discrete steps. The half kernel is normalized to sum to 0.5.
+    static void make_half_kernel_and_summed_table(float* halfKernel, float* summedHalfKernel,
+                                                  int halfKernelSize, float sigma) {
+        // The half kernel should sum to 0.5 not 1.0.
+        const float tot = 2.f * make_unnormalized_half_kernel(halfKernel, halfKernelSize, sigma);
+        float sum = 0.f;
+        for (int i = 0; i < halfKernelSize; ++i) {
+            halfKernel[i] /= tot;
+            sum += halfKernel[i];
+            summedHalfKernel[i] = sum;
+        }
+    }
+
+    // Applies the 1D half kernel vertically at points along the x axis to a circle centered at the
+    // origin with radius circleR.
+    void apply_kernel_in_y(float* results, int numSteps, float firstX, float circleR,
+                           int halfKernelSize, const float* summedHalfKernelTable) {
+        float x = firstX;
+        for (int i = 0; i < numSteps; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                results[i] = 0;
+                continue;
+            }
+            float y = sqrtf(circleR * circleR - x * x);
+            // In the column at x we exit the circle at +y and -y
+            // The summed table entry j is actually reflects an offset of j + 0.5.
+            y -= 0.5f;
+            int yInt = SkScalarFloorToInt(y);
+            SkASSERT(yInt >= -1);
+            if (y < 0) {
+                results[i] = (y + 0.5f) * summedHalfKernelTable[0];
+            } else if (yInt >= halfKernelSize - 1) {
+                results[i] = 0.5f;
+            } else {
+                float yFrac = y - yInt;
+                results[i] = (1.f - yFrac) * summedHalfKernelTable[yInt] +
+                             yFrac * summedHalfKernelTable[yInt + 1];
+            }
+        }
+    }
+
+    // Apply a Gaussian at point (evalX, 0) to a circle centered at the origin with radius circleR.
+    // This relies on having a half kernel computed for the Gaussian and a table of applications of
+    // the half kernel in y to columns at (evalX - halfKernel, evalX - halfKernel + 1, ..., evalX +
+    // halfKernel) passed in as yKernelEvaluations.
+    static uint8_t eval_at(float evalX, float circleR, const float* halfKernel, int halfKernelSize,
+                           const float* yKernelEvaluations) {
+        float acc = 0;
+
+        float x = evalX - halfKernelSize;
+        for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                continue;
+            }
+            float verticalEval = yKernelEvaluations[i];
+            acc += verticalEval * halfKernel[halfKernelSize - i - 1];
+        }
+        for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
+            if (x < -circleR || x > circleR) {
+                continue;
+            }
+            float verticalEval = yKernelEvaluations[i + halfKernelSize];
+            acc += verticalEval * halfKernel[i];
+        }
+        // Since we applied a half kernel in y we multiply acc by 2 (the circle is symmetric about
+        // the x axis).
+        return SkUnitScalarClampToByte(2.f * acc);
+    }
+
+    // This function creates a profile of a blurred circle. It does this by computing a kernel for
+    // half the Gaussian and a matching summed area table. The summed area table is used to compute
+    // an array of vertical applications of the half kernel to the circle along the x axis. The
+    // table of y evaluations has 2 * k + n entries where k is the size of the half kernel and n is
+    // the size of the profile being computed. Then for each of the n profile entries we walk out k
+    // steps in each horizontal direction multiplying the corresponding y evaluation by the half
+    // kernel entry and sum these values to compute the profile entry.
+    static uint8_t* create_circle_profile(float sigma, float circleR, int profileTextureWidth) {
+        const int numSteps = profileTextureWidth;
+        uint8_t* weights = new uint8_t[numSteps];
+
+        // The full kernel is 6 sigmas wide.
+        int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);
+        // round up to next multiple of 2 and then divide by 2
+        halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
+
+        // Number of x steps at which to apply kernel in y to cover all the profile samples in x.
+        int numYSteps = numSteps + 2 * halfKernelSize;
+
+        SkAutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
+        float* halfKernel = bulkAlloc.get();
+        float* summedKernel = bulkAlloc.get() + halfKernelSize;
+        float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
+        make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
+
+        float firstX = -halfKernelSize + 0.5f;
+        apply_kernel_in_y(yEvals, numYSteps, firstX, circleR, halfKernelSize, summedKernel);
+
+        for (int i = 0; i < numSteps - 1; ++i) {
+            float evalX = i + 0.5f;
+            weights[i] = eval_at(evalX, circleR, halfKernel, halfKernelSize, yEvals + i);
+        }
+        // Ensure the tail of the Gaussian goes to zero.
+        weights[numSteps - 1] = 0;
+        return weights;
+    }
+
+    static uint8_t* create_half_plane_profile(int profileWidth) {
+        SkASSERT(!(profileWidth & 0x1));
+        // The full kernel is 6 sigmas wide.
+        float sigma = profileWidth / 6.f;
+        int halfKernelSize = profileWidth / 2;
+
+        SkAutoTArray<float> halfKernel(halfKernelSize);
+        uint8_t* profile = new uint8_t[profileWidth];
+
+        // The half kernel should sum to 0.5.
+        const float tot = 2.f * make_unnormalized_half_kernel(halfKernel.get(), halfKernelSize,
+                                                              sigma);
+        float sum = 0.f;
+        // Populate the profile from the right edge to the middle.
+        for (int i = 0; i < halfKernelSize; ++i) {
+            halfKernel[halfKernelSize - i - 1] /= tot;
+            sum += halfKernel[halfKernelSize - i - 1];
+            profile[profileWidth - i - 1] = SkUnitScalarClampToByte(sum);
+        }
+        // Populate the profile from the middle to the left edge (by flipping the half kernel and
+        // continuing the summation).
+        for (int i = 0; i < halfKernelSize; ++i) {
+            sum += halfKernel[i];
+            profile[halfKernelSize - i - 1] = SkUnitScalarClampToByte(sum);
+        }
+        // Ensure tail goes to 0.
+        profile[profileWidth - 1] = 0;
+        return profile;
+    }
+
+    static sk_sp<GrTextureProxy> create_profile_texture(GrResourceProvider* resourceProvider,
+                                                        const SkRect& circle,
+                                                        float sigma,
+                                                        float* solidRadius, float* textureRadius) {
+        float circleR = circle.width() / 2.0f;
+        // Profile textures are cached by the ratio of sigma to circle radius and by the size of the
+        // profile texture (binned by powers of 2).
+        SkScalar sigmaToCircleRRatio = sigma / circleR;
+        // When sigma is really small this becomes a equivalent to convolving a Gaussian with a
+        // half-plane. Similarly, in the extreme high ratio cases circle becomes a point WRT to the
+        // Guassian and the profile texture is a just a Gaussian evaluation. However, we haven't yet
+        // implemented this latter optimization.
+        sigmaToCircleRRatio = SkTMin(sigmaToCircleRRatio, 8.f);
+        SkFixed sigmaToCircleRRatioFixed;
+        static const SkScalar kHalfPlaneThreshold = 0.1f;
+        bool useHalfPlaneApprox = false;
+        if (sigmaToCircleRRatio <= kHalfPlaneThreshold) {
+            useHalfPlaneApprox = true;
+            sigmaToCircleRRatioFixed = 0;
+            *solidRadius = circleR - 3 * sigma;
+            *textureRadius = 6 * sigma;
+        } else {
+            // Convert to fixed point for the key.
+            sigmaToCircleRRatioFixed = SkScalarToFixed(sigmaToCircleRRatio);
+            // We shave off some bits to reduce the number of unique entries. We could probably
+            // shave off more than we do.
+            sigmaToCircleRRatioFixed &= ~0xff;
+            sigmaToCircleRRatio = SkFixedToScalar(sigmaToCircleRRatioFixed);
+            sigma = circleR * sigmaToCircleRRatio;
+            *solidRadius = 0;
+            *textureRadius = circleR + 3 * sigma;
+        }
+
+        static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+        GrUniqueKey key;
+        GrUniqueKey::Builder builder(&key, kDomain, 1);
+        builder[0] = sigmaToCircleRRatioFixed;
+        builder.finish();
+
+        sk_sp<GrTextureProxy> blurProfile = resourceProvider->findProxyByUniqueKey(key);
+        if (!blurProfile) {
+            static constexpr int kProfileTextureWidth = 512;
+            GrSurfaceDesc texDesc;
+            texDesc.fWidth = kProfileTextureWidth;
+            texDesc.fHeight = 1;
+            texDesc.fConfig = kAlpha_8_GrPixelConfig;
+
+            std::unique_ptr<uint8_t[]> profile(nullptr);
+            if (useHalfPlaneApprox) {
+                profile.reset(create_half_plane_profile(kProfileTextureWidth));
+            } else {
+                // Rescale params to the size of the texture we're creating.
+                SkScalar scale = kProfileTextureWidth / *textureRadius;
+                profile.reset(create_circle_profile(sigma * scale, circleR * scale,
+                                                    kProfileTextureWidth));
+            }
+
+            blurProfile = GrSurfaceProxy::MakeDeferred(resourceProvider,
+                                                       texDesc, SkBudgeted::kYes, profile.get(), 0);
+            if (!blurProfile) {
+                return nullptr;
+            }
+
+            resourceProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+        }
+
+        return blurProfile;
+    }
+
+    sk_sp<GrFragmentProcessor> GrCircleBlurFragmentProcessor::Make(
+                                                               GrResourceProvider* resourceProvider,
+                                                               const SkRect& circle,
+                                                               float sigma) {
+        float solidRadius;
+        float textureRadius;
+        sk_sp<GrTextureProxy> profile(create_profile_texture(resourceProvider, circle, sigma,
+                                                             &solidRadius, &textureRadius));
+        if (!profile) {
+            return nullptr;
+        }
+        return sk_sp<GrFragmentProcessor>(new GrCircleBlurFragmentProcessor(circle,
+                                                                            textureRadius,
+                                                                            solidRadius,
+                                                                            std::move(profile),
+                                                                            resourceProvider));
+    }
+}
+
+void main() {
+    // We just want to compute "(length(vec) - circleData.z + 0.5) * circleData.w" but need to
+    // rearrange for precision.
+    vec2 vec = vec2((sk_FragCoord.x - circleData.x) * circleData.w,
+                    (sk_FragCoord.y - circleData.y) * circleData.w);
+    float dist = length(vec) + (0.5 - circleData.z) * circleData.w;
+    sk_OutColor = sk_InColor * texture(blurProfileSampler, vec2(dist, 0.5)).a;
+}
+
+@test(testData) {
+    SkScalar wh = testData->fRandom->nextRangeScalar(100.f, 1000.f);
+    SkScalar sigma = testData->fRandom->nextRangeF(1.f,10.f);
+    SkRect circle = SkRect::MakeWH(wh, wh);
+    return GrCircleBlurFragmentProcessor::Make(testData->resourceProvider(), circle, sigma);
+}
\ No newline at end of file
diff --git a/src/effects/GrCircleBlurFragmentProcessor.h b/src/effects/GrCircleBlurFragmentProcessor.h
index 3ed4cfc..c01ea9b 100644
--- a/src/effects/GrCircleBlurFragmentProcessor.h
+++ b/src/effects/GrCircleBlurFragmentProcessor.h
@@ -1,73 +1,52 @@
 /*
- * Copyright 2015 Google Inc.
+ * Copyright 2017 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
+/*
+ * This file was autogenerated from GrCircleBlurFragmentProcessor.fp; do not modify.
+ */
 #ifndef GrCircleBlurFragmentProcessor_DEFINED
 #define GrCircleBlurFragmentProcessor_DEFINED
-
-#include "SkString.h"
 #include "SkTypes.h"
-
 #if SK_SUPPORT_GPU
-
 #include "GrFragmentProcessor.h"
-#include "GrProcessorUnitTest.h"
-
-class GrResourceProvider;
-
-// This FP handles the special case of a blurred circle. It uses a 1D
-// profile that is just rotated about the origin of the circle.
+#include "GrCoordTransform.h"
+#include "effects/GrProxyMove.h"
 class GrCircleBlurFragmentProcessor : public GrFragmentProcessor {
 public:
-    static sk_sp<GrFragmentProcessor> Make(GrResourceProvider*, const SkRect& circle, float sigma);
+    SkRect circleRect() const { return fCircleRect; }
+    float textureRadius() const { return fTextureRadius; }
+    float solidRadius() const { return fSolidRadius; }
 
-    ~GrCircleBlurFragmentProcessor() override {}
-
-    const char* name() const override { return "CircleBlur"; }
-
-    SkString dumpInfo() const override {
-        SkString str;
-        str.appendf("Rect [L: %.2f, T: %.2f, R: %.2f, B: %.2f], solidR: %.2f, textureR: %.2f",
-                    fCircle.fLeft, fCircle.fTop, fCircle.fRight, fCircle.fBottom,
-                    fSolidRadius, fTextureRadius);
-        return str;
-    }
-
+    static sk_sp<GrFragmentProcessor> Make(GrResourceProvider* resourceProvider,
+                                           const SkRect& circle, float sigma);
+    const char* name() const override { return "CircleBlurFragmentProcessor"; }
 private:
-    // This nested GLSL processor implementation is defined in the cpp file.
-    class GLSLProcessor;
-
-    /**
-     * Creates a profile texture for the circle and sigma. The texture will have a height of 1.
-     * The x texture coord should map from 0 to 1 across the radius range of solidRadius to
-     * solidRadius + textureRadius.
-     */
-    GrCircleBlurFragmentProcessor(const SkRect& circle,
-                                  float textureRadius, float innerRadius,
-                                  sk_sp<GrTextureProxy> blurProfile);
-
-    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
-
-    void onGetGLSLProcessorKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const override;
-
-    bool onIsEqual(const GrFragmentProcessor& other) const override {
-        const GrCircleBlurFragmentProcessor& cbfp = other.cast<GrCircleBlurFragmentProcessor>();
-        return fCircle == cbfp.fCircle && fSolidRadius == cbfp.fSolidRadius &&
-               fTextureRadius == cbfp.fTextureRadius;
+    GrCircleBlurFragmentProcessor(SkRect circleRect, float textureRadius, float solidRadius, sk_sp<GrTextureProxy> blurProfileSampler, 
+    GrResourceProvider* resourceProvider
+)
+    : INHERITED((OptimizationFlags) 
+    kCompatibleWithCoverageAsAlpha_OptimizationFlag
+)
+    , fCircleRect(circleRect)
+    , fTextureRadius(textureRadius)
+    , fSolidRadius(solidRadius)
+    , fBlurProfileSampler(std::move(blurProfileSampler)) {
+        this->addTextureSampler(&fBlurProfileSampler);
+        this->initClassID<GrCircleBlurFragmentProcessor>();
     }
-
-    SkRect              fCircle;
-    SkScalar            fSolidRadius;
-    float               fTextureRadius;
-    TextureSampler      fBlurProfileSampler;
-
+    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
+    void onGetGLSLProcessorKey(const GrShaderCaps&,GrProcessorKeyBuilder*) const override;
+    bool onIsEqual(const GrFragmentProcessor&) const override;
     GR_DECLARE_FRAGMENT_PROCESSOR_TEST
-
+    SkRect fCircleRect;
+    float fTextureRadius;
+    float fSolidRadius;
+    TextureSampler fBlurProfileSampler;
     typedef GrFragmentProcessor INHERITED;
 };
-
 #endif
 #endif
diff --git a/src/gpu/effects/GrDitherEffect.cpp b/src/gpu/effects/GrDitherEffect.cpp
index 8c24d78..370e0ee 100644
--- a/src/gpu/effects/GrDitherEffect.cpp
+++ b/src/gpu/effects/GrDitherEffect.cpp
@@ -9,6 +9,7 @@
  * This file was autogenerated from GrDitherEffect.fp; do not modify.
  */
 #include "GrDitherEffect.h"
+#if SK_SUPPORT_GPU
 #include "glsl/GrGLSLColorSpaceXformHelper.h"
 #include "glsl/GrGLSLFragmentProcessor.h"
 #include "glsl/GrGLSLFragmentShaderBuilder.h"
@@ -45,3 +46,4 @@
     return GrDitherEffect::Make();
 }
 #endif
+#endif
diff --git a/src/gpu/effects/GrDitherEffect.h b/src/gpu/effects/GrDitherEffect.h
index cbb6264..d0bf9a9 100644
--- a/src/gpu/effects/GrDitherEffect.h
+++ b/src/gpu/effects/GrDitherEffect.h
@@ -10,6 +10,8 @@
  */
 #ifndef GrDitherEffect_DEFINED
 #define GrDitherEffect_DEFINED
+#include "SkTypes.h"
+#if SK_SUPPORT_GPU
 #include "GrFragmentProcessor.h"
 #include "GrCoordTransform.h"
 #include "effects/GrProxyMove.h"
@@ -31,3 +33,4 @@
     typedef GrFragmentProcessor INHERITED;
 };
 #endif
+#endif
diff --git a/src/sksl/SkSLCPPCodeGenerator.cpp b/src/sksl/SkSLCPPCodeGenerator.cpp
index 09622f3..72dcd01 100644
--- a/src/sksl/SkSLCPPCodeGenerator.cpp
+++ b/src/sksl/SkSLCPPCodeGenerator.cpp
@@ -161,6 +161,19 @@
     }
 }
 
+String CPPCodeGenerator::getSamplerHandle(const Variable& var) {
+    int samplerCount = 0;
+    for (const auto param : fSectionAndParameterHelper.fParameters) {
+        if (&var == param) {
+            return "args.fTexSamplers[" + to_string(samplerCount) + "]";
+        }
+        if (param->fType.kind() == Type::kSampler_Kind) {
+            ++samplerCount;
+        }
+    }
+    ABORT("should have found sampler in parameters\n");
+}
+
 void CPPCodeGenerator::writeVariableReference(const VariableReference& ref) {
     switch (ref.fVariable.fModifiers.fLayout.fBuiltin) {
         case SK_INCOLOR_BUILTIN:
@@ -173,20 +186,10 @@
             break;
         default:
             if (ref.fVariable.fType.kind() == Type::kSampler_Kind) {
-                int samplerCount = 0;
-                for (const auto param : fSectionAndParameterHelper.fParameters) {
-                    if (&ref.fVariable == param) {
-                        this->write("%s");
-                        fFormatArgs.push_back("fragBuilder->getProgramBuilder()->samplerVariable("
-                                              "args.fTexSamplers[" + to_string(samplerCount) +
-                                              "]).c_str()");
-                        return;
-                    }
-                    if (param->fType.kind() == Type::kSampler_Kind) {
-                        ++samplerCount;
-                    }
-                }
-                ABORT("should have found sampler in parameters\n");
+                this->write("%s");
+                fFormatArgs.push_back("fragBuilder->getProgramBuilder()->samplerVariable(" +
+                                      this->getSamplerHandle(ref.fVariable) + ").c_str()");
+                return;
             }
             if (ref.fVariable.fModifiers.fFlags & Modifiers::kUniform_Flag) {
                 this->write("%s");
@@ -222,6 +225,18 @@
     }
 }
 
+void CPPCodeGenerator::writeFunctionCall(const FunctionCall& c) {
+    INHERITED::writeFunctionCall(c);
+    if (c.fFunction.fBuiltin && c.fFunction.fName == "texture") {
+        this->write(".%s");
+        ASSERT(c.fArguments.size() >= 1);
+        ASSERT(c.fArguments[0]->fKind == Expression::kVariableReference_Kind);
+        String sampler = this->getSamplerHandle(((VariableReference&) *c.fArguments[0]).fVariable);
+        fFormatArgs.push_back("fragBuilder->getProgramBuilder()->samplerSwizzle(" + sampler +
+                              ").c_str()");
+    }
+}
+
 void CPPCodeGenerator::writeFunction(const FunctionDefinition& f) {
     if (f.fDeclaration.fName == "main") {
         fFunctionHeader = "";
@@ -542,7 +557,8 @@
     const char* baseName = fName.c_str();
     const char* fullName = fFullName.c_str();
     this->writef(kFragmentProcessorHeader, fullName);
-    this->writef("#include \"%s.h\"\n", fullName);
+    this->writef("#include \"%s.h\"\n"
+                 "#if SK_SUPPORT_GPU\n", fullName);
     this->writeSection(CPP_SECTION);
     this->writef("#include \"glsl/GrGLSLColorSpaceXformHelper.h\"\n"
                  "#include \"glsl/GrGLSLFragmentProcessor.h\"\n"
@@ -593,6 +609,7 @@
                 "}\n");
     this->writeTest();
     this->writeSection(CPP_END_SECTION);
+    this->write("#endif\n");
     result &= 0 == fErrors.errorCount();
     return result;
 }
diff --git a/src/sksl/SkSLCPPCodeGenerator.h b/src/sksl/SkSLCPPCodeGenerator.h
index c7388ad..0f6da5f 100644
--- a/src/sksl/SkSLCPPCodeGenerator.h
+++ b/src/sksl/SkSLCPPCodeGenerator.h
@@ -39,6 +39,10 @@
 
     void writeVariableReference(const VariableReference& ref) override;
 
+    String getSamplerHandle(const Variable& var);
+
+    void writeFunctionCall(const FunctionCall& c) override;
+
     void writeFunction(const FunctionDefinition& f) override;
 
     void writeSetting(const Setting& s) override;
diff --git a/src/sksl/SkSLGLSLCodeGenerator.h b/src/sksl/SkSLGLSLCodeGenerator.h
index aaf0369..5716bde 100644
--- a/src/sksl/SkSLGLSLCodeGenerator.h
+++ b/src/sksl/SkSLGLSLCodeGenerator.h
@@ -128,7 +128,7 @@
 
     void writeMinAbsHack(Expression& absExpr, Expression& otherExpr);
 
-    void writeFunctionCall(const FunctionCall& c);
+    virtual void writeFunctionCall(const FunctionCall& c);
 
     void writeConstructor(const Constructor& c);
 
diff --git a/src/sksl/SkSLHCodeGenerator.cpp b/src/sksl/SkSLHCodeGenerator.cpp
index 405fb0e..cd3f7f2 100644
--- a/src/sksl/SkSLHCodeGenerator.cpp
+++ b/src/sksl/SkSLHCodeGenerator.cpp
@@ -201,6 +201,8 @@
                  "#define %s_DEFINED\n",
                  fFullName.c_str(),
                  fFullName.c_str());
+    this->writef("#include \"SkTypes.h\"\n"
+                 "#if SK_SUPPORT_GPU\n");
     this->writeSection(HEADER_SECTION);
     this->writef("#include \"GrFragmentProcessor.h\"\n"
                  "#include \"GrCoordTransform.h\"\n"
@@ -231,7 +233,8 @@
     this->writef("    typedef GrFragmentProcessor INHERITED;\n"
                 "};\n");
     this->writeSection(HEADER_END_SECTION);
-    this->writef("#endif\n");
+    this->writef("#endif\n"
+                 "#endif\n");
     return 0 == fErrors.errorCount();
 }
 
