Add MSAA and non-aa modes to GrFillRRect Op

Adds a non-aa mode and an MSAA mode that uses the sample mask. Also
adds a new cap to decide whether we prefer this new sample mask Op for
large round rects, or whether it's faster to just continue drawing
them as paths like before.

Bug: skia:
Change-Id: Ic344ace26e7889c312c3040ad345b4d9a717f96d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/204135
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
index 891e973..5c4b1dc 100644
--- a/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
+++ b/src/gpu/glsl/GrGLSLFragmentShaderBuilder.cpp
@@ -91,7 +91,8 @@
     return "_sampleOffsets";
 }
 
-void GrGLSLFragmentShaderBuilder::maskOffMultisampleCoverage(const char* mask, Scope scope) {
+void GrGLSLFragmentShaderBuilder::maskOffMultisampleCoverage(
+        const char* mask, ScopeFlags scopeFlags) {
     const GrShaderCaps& shaderCaps = *fProgramBuilder->shaderCaps();
     if (!shaderCaps.sampleVariablesSupport()) {
         SkDEBUGFAIL("Attempted to mask sample coverage without support.");
@@ -101,18 +102,60 @@
         this->addFeature(1 << kSampleVariables_GLSLPrivateFeature, extension);
     }
 
-    if (!fHasInitializedSampleMask && Scope::kTopLevel == scope) {
-        this->codeAppendf("gl_SampleMask[0] = (%s);", mask);
-        fHasInitializedSampleMask = true;
-        return;
+    if (!fHasModifiedSampleMask) {
+        fHasModifiedSampleMask = true;
+        if (ScopeFlags::kTopLevel != scopeFlags) {
+            this->codePrependf("gl_SampleMask[0] = ~0;");
+        }
+        if (!(ScopeFlags::kInsideLoop & scopeFlags)) {
+            this->codeAppendf("gl_SampleMask[0] = (%s);", mask);
+            return;
+        }
     }
-    if (!fHasInitializedSampleMask) {
-        this->codePrependf("gl_SampleMask[0] = ~0;");
-        fHasInitializedSampleMask = true;
-    }
+
     this->codeAppendf("gl_SampleMask[0] &= (%s);", mask);
 }
 
+void GrGLSLFragmentShaderBuilder::applyFnToMultisampleMask(
+        const char* fn, const char* grad, ScopeFlags scopeFlags) {
+    SkASSERT(CustomFeatures::kSampleLocations & fProgramBuilder->header().processorFeatures());
+    SkDEBUGCODE(fUsedProcessorFeaturesThisStage_DebugOnly |= CustomFeatures::kSampleLocations);
+    SkDEBUGCODE(fUsedProcessorFeaturesAllStages_DebugOnly |= CustomFeatures::kSampleLocations);
+
+    int sampleCnt = fProgramBuilder->effectiveSampleCnt();
+    SkASSERT(sampleCnt > 1);
+
+    this->codeAppendf("{");
+
+    if (!grad) {
+        SkASSERT(fProgramBuilder->shaderCaps()->shaderDerivativeSupport());
+        // In order to use HW derivatives, our neighbors within the same primitive must also be
+        // executing the same code. A per-pixel branch makes this pre-condition impossible to
+        // fulfill.
+        SkASSERT(!(ScopeFlags::kInsidePerPixelBranch & scopeFlags));
+        this->codeAppendf("float2 grad = float2(dFdx(fn), dFdy(fn));");
+        this->codeAppendf("float fnwidth = fwidth(fn);");
+        grad = "grad";
+    } else {
+        this->codeAppendf("float fnwidth = abs(%s.x) + abs(%s.y);", grad, grad);
+    }
+
+    this->codeAppendf("int mask = 0;");
+    this->codeAppendf("if (%s*2 < fnwidth) {", fn);  // Are ANY samples inside the implicit fn?
+    this->codeAppendf(    "if (%s*-2 >= fnwidth) {", fn);  // Are ALL samples inside the implicit?
+    this->codeAppendf(        "mask = ~0;");
+    this->codeAppendf(    "} else for (int i = 0; i < %i; ++i) {", sampleCnt);
+    this->codeAppendf(        "float fnsample = dot(%s, _sampleOffsets[i]) + %s;", grad, fn);
+    this->codeAppendf(        "if (fnsample < 0) {");
+    this->codeAppendf(            "mask |= (1 << i);");
+    this->codeAppendf(        "}");
+    this->codeAppendf(    "}");
+    this->codeAppendf("}");
+    this->maskOffMultisampleCoverage("mask", scopeFlags);
+
+    this->codeAppendf("}");
+}
+
 const char* GrGLSLFragmentShaderBuilder::dstColor() {
     SkDEBUGCODE(fHasReadDstColorThisStage_DebugOnly = true;)
 
@@ -217,10 +260,10 @@
                      == fUsedProcessorFeaturesAllStages_DebugOnly);
 
     if (CustomFeatures::kSampleLocations & fProgramBuilder->header().processorFeatures()) {
-        this->definitions().append("const float2 _sampleOffsets[] = float2[](");
         const GrPipeline& pipeline = fProgramBuilder->pipeline();
         const SkTArray<SkPoint>& sampleLocations =
                 fProgramBuilder->renderTarget()->renderTargetPriv().getSampleLocations(pipeline);
+        this->definitions().append("const float2 _sampleOffsets[] = float2[](");
         for (int i = 0; i < sampleLocations.count(); ++i) {
             SkPoint offset = sampleLocations[i] - SkPoint::Make(.5f, .5f);
             if (kBottomLeft_GrSurfaceOrigin == this->getSurfaceOrigin()) {