Implement stencil clipping in mixed sampled render targets

This change enables multisampled clipping for mixed sampled
render targets.  Previously clipping in mixed samples config
behaved the same as in the gpu config.

In order to retrofit non-MSAA draw methods, programmable sample
locations are used in order to colocate all samples at (0.5, 0.5).
Requires support for NV_sample_locations.

BUG=skia:4399

Review URL: https://codereview.chromium.org/1232103002
diff --git a/include/gpu/GrCaps.h b/include/gpu/GrCaps.h
index 248135d..0f376f9 100644
--- a/include/gpu/GrCaps.h
+++ b/include/gpu/GrCaps.h
@@ -63,6 +63,7 @@
     bool dstReadInShaderSupport() const { return fDstReadInShaderSupport; }
     bool dualSourceBlendingSupport() const { return fDualSourceBlendingSupport; }
     bool mixedSamplesSupport() const { return fMixedSamplesSupport; }
+    bool programmableSampleLocationsSupport() const { return fProgrammableSampleLocationsSupport; }
 
     /**
     * Get the precision info for a variable of type kFloat_GrSLType, kVec2f_GrSLType, etc in a
@@ -93,6 +94,7 @@
     bool fDstReadInShaderSupport : 1;
     bool fDualSourceBlendingSupport : 1;
     bool fMixedSamplesSupport : 1;
+    bool fProgrammableSampleLocationsSupport : 1;
 
     bool fShaderPrecisionVaries;
     PrecisionInfo fFloatPrecisions[kGrShaderTypeCount][kGrSLPrecisionCount];
diff --git a/include/gpu/gl/GrGLFunctions.h b/include/gpu/gl/GrGLFunctions.h
index aea6d2c..7fe82d6 100644
--- a/include/gpu/gl/GrGLFunctions.h
+++ b/include/gpu/gl/GrGLFunctions.h
@@ -390,6 +390,8 @@
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLGetVertexArrayPointeri_vProc)(GrGLuint vaobj, GrGLuint index, GrGLenum pname, GrGLvoid **param);
 typedef GrGLvoid* (GR_GL_FUNCTION_TYPE* GrGLMapNamedBufferRangeProc)(GrGLuint buffer, GrGLintptr offset, GrGLsizeiptr length, GrGLbitfield access);
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLFlushMappedNamedBufferRangeProc)(GrGLuint buffer, GrGLintptr offset, GrGLsizeiptr length);
+// OpenGL 4.5
+typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLNamedFramebufferParameteriProc)(GrGLuint framebuffer, GrGLenum pname, GrGLint param);
 
 /* KHR_debug */
 typedef GrGLvoid (GR_GL_FUNCTION_TYPE* GrGLDebugMessageControlProc)(GrGLenum source, GrGLenum type, GrGLenum severity, GrGLsizei count, const GrGLuint* ids, GrGLboolean enabled);
diff --git a/include/gpu/gl/GrGLInterface.h b/include/gpu/gl/GrGLInterface.h
index bdbedd9..c4748c7 100644
--- a/include/gpu/gl/GrGLInterface.h
+++ b/include/gpu/gl/GrGLInterface.h
@@ -490,6 +490,8 @@
         GLPtr<GrGLGetVertexArrayPointeri_vProc> fGetVertexArrayPointeri_v;
         GLPtr<GrGLMapNamedBufferRangeProc> fMapNamedBufferRange;
         GLPtr<GrGLFlushMappedNamedBufferRangeProc> fFlushMappedNamedBufferRange;
+        // OpenGL 4.5
+        GLPtr<GrGLNamedFramebufferParameteriProc> fNamedFramebufferParameteri;
 
         /* KHR_debug */
         GLPtr<GrGLDebugMessageControlProc> fDebugMessageControl;
diff --git a/src/gpu/GrCaps.cpp b/src/gpu/GrCaps.cpp
index 1bcdb20..cab44b9 100644
--- a/src/gpu/GrCaps.cpp
+++ b/src/gpu/GrCaps.cpp
@@ -16,6 +16,7 @@
     fDstReadInShaderSupport = false;
     fDualSourceBlendingSupport = false;
     fMixedSamplesSupport = false;
+    fProgrammableSampleLocationsSupport = false;
     fShaderPrecisionVaries = false;
 }
 
@@ -46,14 +47,15 @@
 SkString GrShaderCaps::dump() const {
     SkString r;
     static const char* gNY[] = { "NO", "YES" };
-    r.appendf("Shader Derivative Support          : %s\n", gNY[fShaderDerivativeSupport]);
-    r.appendf("Geometry Shader Support            : %s\n", gNY[fGeometryShaderSupport]);
-    r.appendf("Path Rendering Support             : %s\n", gNY[fPathRenderingSupport]);
-    r.appendf("Dst Read In Shader Support         : %s\n", gNY[fDstReadInShaderSupport]);
-    r.appendf("Dual Source Blending Support       : %s\n", gNY[fDualSourceBlendingSupport]);
-    r.appendf("Mixed Samples Support              : %s\n", gNY[fMixedSamplesSupport]);
+    r.appendf("Shader Derivative Support             : %s\n", gNY[fShaderDerivativeSupport]);
+    r.appendf("Geometry Shader Support               : %s\n", gNY[fGeometryShaderSupport]);
+    r.appendf("Path Rendering Support                : %s\n", gNY[fPathRenderingSupport]);
+    r.appendf("Dst Read In Shader Support            : %s\n", gNY[fDstReadInShaderSupport]);
+    r.appendf("Dual Source Blending Support          : %s\n", gNY[fDualSourceBlendingSupport]);
+    r.appendf("Mixed Samples Support                 : %s\n", gNY[fMixedSamplesSupport]);
+    r.appendf("Programmable Sample Locations Support : %s\n", gNY[fProgrammableSampleLocationsSupport]);
 
-    r.appendf("Shader Float Precisions (varies: %s):\n", gNY[fShaderPrecisionVaries]);
+    r.appendf("Shader Float Precisions (varies: %s)  :\n", gNY[fShaderPrecisionVaries]);
 
     for (int s = 0; s < kGrShaderTypeCount; ++s) {
         GrShaderType shaderType = static_cast<GrShaderType>(s);
diff --git a/src/gpu/GrClipMaskManager.cpp b/src/gpu/GrClipMaskManager.cpp
index 0ab1600..0df74a6 100644
--- a/src/gpu/GrClipMaskManager.cpp
+++ b/src/gpu/GrClipMaskManager.cpp
@@ -306,7 +306,7 @@
     }
 
     // If MSAA is enabled we can do everything in the stencil buffer.
-    if (0 == rt->numColorSamples() && requiresAA) {
+    if (0 == rt->numStencilSamples() && requiresAA) {
         GrTexture* result = nullptr;
 
         // The top-left of the mask corresponds to the top-left corner of the bounds.
@@ -757,7 +757,7 @@
             pipelineBuilder.setDisableColorXPFactory();
 
             // if the target is MSAA then we want MSAA enabled when the clip is soft
-            if (rt->isUnifiedMultisampled()) {
+            if (rt->isStencilBufferMultisampled()) {
                 pipelineBuilder.setState(GrPipelineBuilder::kHWAntialias_Flag, element->isAA());
             }
 
diff --git a/src/gpu/gl/GrGLAssembleInterface.cpp b/src/gpu/gl/GrGLAssembleInterface.cpp
index f19a1cb..d7a0af8 100644
--- a/src/gpu/gl/GrGLAssembleInterface.cpp
+++ b/src/gpu/gl/GrGLAssembleInterface.cpp
@@ -450,6 +450,10 @@
         }
     }
 
+    if (glVer >= GR_GL_VER(4,5)) {
+        GET_PROC(NamedFramebufferParameteri);
+    }
+
     if (glVer >= GR_GL_VER(4,3) || extensions.has("GL_KHR_debug")) {
         // KHR_debug defines these methods to have no suffix in an OpenGL (not ES) context.
         GET_PROC(DebugMessageControl);
diff --git a/src/gpu/gl/GrGLCaps.cpp b/src/gpu/gl/GrGLCaps.cpp
index 6522e1b..b97022f 100644
--- a/src/gpu/gl/GrGLCaps.cpp
+++ b/src/gpu/gl/GrGLCaps.cpp
@@ -355,6 +355,10 @@
         fDiscardRenderTargetSupport = false;
         fInvalidateFBType = kNone_InvalidateFBType;
     }
+    glslCaps->fProgrammableSampleLocationsSupport =
+        ctxInfo.hasExtension("GL_NV_sample_locations") ||
+        ctxInfo.hasExtension("GL_ARB_sample_locations");
+
 
     /**************************************************************************
      * GrCaps fields
diff --git a/src/gpu/gl/GrGLDefines.h b/src/gpu/gl/GrGLDefines.h
index cc588b9..89ff56b 100644
--- a/src/gpu/gl/GrGLDefines.h
+++ b/src/gpu/gl/GrGLDefines.h
@@ -889,6 +889,9 @@
 #define GR_GL_MULTISAMPLE_RASTERIZATION_ALLOWED             0x932B
 #define GR_GL_EFFECTIVE_RASTER_SAMPLES                      0x932C
 
+/* GL_NV_sample_locations and GL_ARB_sample_locations */
+#define GR_GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS     0x9342
+
 /* GL_KHR_debug */
 #define GR_GL_DEBUG_OUTPUT                                  0x92E0
 #define GR_GL_DEBUG_OUTPUT_SYNCHRONOUS                      0x8242
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 5388387..cd6bf39 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -1476,7 +1476,7 @@
     GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(pipeline.getRenderTarget());
     this->flushStencil(pipeline.getStencil());
     this->flushScissor(pipeline.getScissorState(), glRT->getViewport(), glRT->origin());
-    this->flushHWAAState(glRT, pipeline.isHWAntialiasState());
+    this->flushHWAAState(glRT, pipeline.isHWAntialiasState(), !pipeline.getStencil().isDisabled());
 
     // This must come after textures are flushed because a texture may need
     // to be msaa-resolved (which will modify bound FBO state).
@@ -1899,6 +1899,22 @@
     return true;
 }
 
+void GrGLGpu::setColocatedSampleLocations(GrRenderTarget* rt, bool useColocatedSampleLocations) {
+    GrGLRenderTarget* target = static_cast<GrGLRenderTarget*>(rt->asRenderTarget());
+    SkASSERT(0 != target->renderFBOID());
+
+    if (!rt->isStencilBufferMultisampled() ||
+        useColocatedSampleLocations == target->usesColocatedSampleLocations()) {
+        return;
+    }
+
+    GL_CALL(NamedFramebufferParameteri(target->renderFBOID(),
+                                       GR_GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS,
+                                       useColocatedSampleLocations));
+
+    target->flagAsUsingColocatedSampleLocations(useColocatedSampleLocations);
+}
+
 void GrGLGpu::flushRenderTarget(GrGLRenderTarget* target, const SkIRect* bound) {
 
     SkASSERT(target);
@@ -2146,9 +2162,19 @@
     }
 }
 
-void GrGLGpu::flushHWAAState(GrRenderTarget* rt, bool useHWAA) {
+void GrGLGpu::flushHWAAState(GrRenderTarget* rt, bool useHWAA, bool stencilEnabled) {
     SkASSERT(!useHWAA || rt->isStencilBufferMultisampled());
 
+    if (rt->hasMixedSamples() && stencilEnabled &&
+        this->glCaps().glslCaps()->programmableSampleLocationsSupport()) {
+        if (useHWAA) {
+            this->setColocatedSampleLocations(rt, false);
+        } else {
+            this->setColocatedSampleLocations(rt, true);
+        }
+        useHWAA = true;
+    }
+
     if (this->glCaps().multisampleDisableSupport()) {
         if (useHWAA) {
             if (kYes_TriState != fMSAAEnabled) {
@@ -2928,7 +2954,7 @@
     this->flushBlend(blendInfo);
     this->flushColorWrite(true);
     this->flushDrawFace(GrPipelineBuilder::kBoth_DrawFace);
-    this->flushHWAAState(dstRT, false);
+    this->flushHWAAState(dstRT, false, false);
     this->disableScissor();
     GrStencilSettings stencil;
     stencil.setDisabled();
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 2387b44..dee2012 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -255,12 +255,16 @@
     // ensures that such operations don't negatively interact with tracking bound textures.
     void setScratchTextureUnit();
 
+    // colocates all samples at pixel center for render target, if MSAA.
+    // allows drawing coverage based AA shapes in MSAA mode.
+    void setColocatedSampleLocations(GrRenderTarget* rt, bool useColocatedSampleLocations);
+
     // bounds is region that may be modified and therefore has to be resolved.
     // nullptr means whole target. Can be an empty rect.
     void flushRenderTarget(GrGLRenderTarget*, const SkIRect* bounds);
 
     void flushStencil(const GrStencilSettings&);
-    void flushHWAAState(GrRenderTarget* rt, bool useHWAA);
+    void flushHWAAState(GrRenderTarget* rt, bool useHWAA, bool stencilEnabled);
 
     bool configToGLFormats(GrPixelConfig config,
                            bool getSizedInternal,
diff --git a/src/gpu/gl/GrGLInterface.cpp b/src/gpu/gl/GrGLInterface.cpp
index ec51797..ecd4003 100644
--- a/src/gpu/gl/GrGLInterface.cpp
+++ b/src/gpu/gl/GrGLInterface.cpp
@@ -701,6 +701,12 @@
         }
     }
 
+    if (kGL_GrGLStandard == fStandard && glVer >= GR_GL_VER(4,5)) {
+        if (nullptr == fFunctions.fNamedFramebufferParameteri) {
+            RETURN_FALSE_INTERFACE
+        }
+    }
+
     if ((kGL_GrGLStandard == fStandard && glVer >= GR_GL_VER(4,3)) ||
         fExtensions.has("GL_KHR_debug")) {
         if (nullptr == fFunctions.fDebugMessageControl ||
diff --git a/src/gpu/gl/GrGLPathRendering.cpp b/src/gpu/gl/GrGLPathRendering.cpp
index 78f78ba..a74014b 100644
--- a/src/gpu/gl/GrGLPathRendering.cpp
+++ b/src/gpu/gl/GrGLPathRendering.cpp
@@ -100,7 +100,7 @@
     SkISize size = SkISize::Make(rt->width(), rt->height());
     this->setProjectionMatrix(*args.fViewMatrix, size, rt->origin());
     gpu->flushScissor(*args.fScissor, rt->getViewport(), rt->origin());
-    gpu->flushHWAAState(rt, args.fUseHWAA);
+    gpu->flushHWAAState(rt, args.fUseHWAA, true);
     gpu->flushRenderTarget(rt, nullptr);
 
     const GrGLPath* glPath = static_cast<const GrGLPath*>(path);
diff --git a/src/gpu/gl/GrGLRenderTarget.h b/src/gpu/gl/GrGLRenderTarget.h
index d1365ef..ce04ae0 100644
--- a/src/gpu/gl/GrGLRenderTarget.h
+++ b/src/gpu/gl/GrGLRenderTarget.h
@@ -70,6 +70,21 @@
     // components seperately.
     void dumpMemoryStatistics(SkTraceMemoryDump* traceMemoryDump) const override;
 
+    /**
+     * @return true if sample locations colocated at pixel center have been set for this
+     *         render target.  Requires support for NV_sample_locations.
+     */
+    bool usesColocatedSampleLocations() const {
+        return fUsesColocatedSampleLocations;
+    }
+
+    /**
+     * Flag render target as using or not using sample locations colocated at pixel center.
+     */
+    void flagAsUsingColocatedSampleLocations(bool useColocatedSampleLocations) {
+        fUsesColocatedSampleLocations = useColocatedSampleLocations;
+    }
+
 protected:
     // The public constructor registers this object with the cache. However, only the most derived
     // class should register with the cache. This constructor does not do the registration and
@@ -116,6 +131,10 @@
     // release zero out the IDs and the cache needs to know the size even after those actions.
     size_t      fGpuMemorySize;
 
+    // True if sample locations colocated at pixel center are currently in use, false if default
+    // sample locations are currently in use.
+    bool        fUsesColocatedSampleLocations;
+
     typedef GrRenderTarget INHERITED;
 };