Batch up draws into triangle fans as large as possible when drawing convex
edge AA polys, so we minimize state changes and GL calls.  This requires
querying GL for the maximum number of fragment uniforms.  It also makes the
shader generator produce custom shaders for the number of relevant edges. 
This gives a ~5X speedup on the "Shapes" SampleApp.

Review URL:  http://codereview.appspot.com/4536070/



git-svn-id: http://skia.googlecode.com/svn/trunk@1380 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gpu/src/GrDrawTarget.cpp b/gpu/src/GrDrawTarget.cpp
index efee91e..250fbe1 100644
--- a/gpu/src/GrDrawTarget.cpp
+++ b/gpu/src/GrDrawTarget.cpp
@@ -483,7 +483,7 @@
 
 bool GrDrawTarget::canDisableBlend() const {
     // If we're using edge antialiasing, we can't force blend off.
-    if (fCurrDrawState.fFlagBits & kEdgeAA_StateBit) {
+    if (fCurrDrawState.fEdgeAANumEdges > 0) {
         return false;
     }
 
@@ -535,8 +535,10 @@
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-void GrDrawTarget::setEdgeAAData(const float edges[18]) {
-    memcpy(fCurrDrawState.fEdgeAAEdges, edges, sizeof(fCurrDrawState.fEdgeAAEdges));
+void GrDrawTarget::setEdgeAAData(const Edge* edges, int numEdges) {
+    GrAssert(numEdges <= kMaxEdges);
+    memcpy(fCurrDrawState.fEdgeAAEdges, edges, numEdges * sizeof(Edge));
+    fCurrDrawState.fEdgeAANumEdges = numEdges;
 }
 
 
diff --git a/gpu/src/GrGLProgram.cpp b/gpu/src/GrGLProgram.cpp
index 0f82d2a..9a9e3c2 100644
--- a/gpu/src/GrGLProgram.cpp
+++ b/gpu/src/GrGLProgram.cpp
@@ -361,11 +361,6 @@
         }
     }
 
-    if (fProgramDesc.fUsesEdgeAA) {
-        segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[6];\n");
-        programData->fUniLocations.fEdgesUni = kUseUniform;
-    }
-
     if (fProgramDesc.fEmitsPointSize){
         segments.fVSCode.append("\tgl_PointSize = 1.0;\n");
     }
@@ -457,17 +452,36 @@
     // we will want to compute coverage for some blend when there is no
     // color (when dual source blending is enabled). But for now we have this if
     if (!wroteFragColorZero) {
-        if (fProgramDesc.fUsesEdgeAA) {
-            // FIXME:  put the a's in a loop
+        if (fProgramDesc.fEdgeAANumEdges > 0) {
+            segments.fFSUnis.append("uniform vec3 " EDGES_UNI_NAME "[");
+            segments.fFSUnis.appendS32(fProgramDesc.fEdgeAANumEdges);
+            segments.fFSUnis.append("];\n");
+            programData->fUniLocations.fEdgesUni = kUseUniform;
+            int count = fProgramDesc.fEdgeAANumEdges;
             segments.fFSCode.append(
-                "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n"
-                "\tfloat a0 = clamp(dot(uEdges[0], pos), 0.0, 1.0);\n"
-                "\tfloat a1 = clamp(dot(uEdges[1], pos), 0.0, 1.0);\n"
-                "\tfloat a2 = clamp(dot(uEdges[2], pos), 0.0, 1.0);\n"
-                "\tfloat a3 = clamp(dot(uEdges[3], pos), 0.0, 1.0);\n"
-                "\tfloat a4 = clamp(dot(uEdges[4], pos), 0.0, 1.0);\n"
-                "\tfloat a5 = clamp(dot(uEdges[5], pos), 0.0, 1.0);\n"
-                "\tfloat edgeAlpha = min(min(a0 * a1, a2 * a3), a4 * a5);\n");
+                "\tvec3 pos = vec3(gl_FragCoord.xy, 1);\n");
+            for (int i = 0; i < count; i++) {
+                segments.fFSCode.append("\tfloat a");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append(" = clamp(dot(" EDGES_UNI_NAME "[");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append("], pos), 0.0, 1.0);\n");
+            }
+            segments.fFSCode.append("\tfloat edgeAlpha = ");
+            for (int i = 0; i < count - 1; i++) {
+                segments.fFSCode.append("min(a");
+                segments.fFSCode.appendS32(i);
+                segments.fFSCode.append(" * a");
+                segments.fFSCode.appendS32(i + 1);
+                segments.fFSCode.append(", ");
+            }
+            segments.fFSCode.append("a");
+            segments.fFSCode.appendS32(count - 1);
+            segments.fFSCode.append(" * a0");
+            for (int i = 0; i < count - 1; i++) {
+                segments.fFSCode.append(")");
+            }
+            segments.fFSCode.append(";\n");
             inCoverage = "edgeAlpha";
             coverageIsScalar = true;
         }
diff --git a/gpu/src/GrGLProgram.h b/gpu/src/GrGLProgram.h
index e02d15b..d4a6406 100644
--- a/gpu/src/GrGLProgram.h
+++ b/gpu/src/GrGLProgram.h
@@ -96,7 +96,7 @@
 
         int  fFirstCoverageStage;
         bool fEmitsPointSize;
-        bool fUsesEdgeAA;
+        int fEdgeAANumEdges;
 
         SkXfermode::Mode fColorFilterXfermode;
 
diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp
index e8c7afb..5a2d2bd 100644
--- a/gpu/src/GrGpuGL.cpp
+++ b/gpu/src/GrGpuGL.cpp
@@ -16,6 +16,7 @@
 
 #include "GrGpuGL.h"
 #include "GrMemory.h"
+#include "GrTypes.h"
 
 static const GrGLuint GR_MAX_GLUINT = ~0;
 static const GrGLint  GR_INVAL_GLINT = ~0;
@@ -201,6 +202,16 @@
         GR_GL_GetIntegerv(GR_GL_MAX_TEXTURE_UNITS, &maxTextureUnits);
         GrAssert(maxTextureUnits > kNumStages);
     }
+    if (GR_GL_SUPPORT_ES2) {
+        GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_VECTORS,
+                          &fMaxFragmentUniformVectors);
+    } else if (GR_GL_SUPPORT_DESKTOP) {
+        GrGLint max;
+        GR_GL_GetIntegerv(GR_GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &max);
+        fMaxFragmentUniformVectors = max / 4;
+    } else {
+        fMaxFragmentUniformVectors = 16;
+    }
 
     ////////////////////////////////////////////////////////////////////////////
     // Check for supported features.
@@ -2064,3 +2075,9 @@
         }
     }
 }
+
+int GrGpuGL::getMaxEdges() const {
+    // FIXME:  This is a pessimistic estimate based on how many other things
+    // want to add uniforms.  This should be centralized somewhere.
+    return GR_CT_MIN(fMaxFragmentUniformVectors - 8, kMaxEdges);
+}
diff --git a/gpu/src/GrGpuGL.h b/gpu/src/GrGpuGL.h
index da955cf..d48d69e 100644
--- a/gpu/src/GrGpuGL.h
+++ b/gpu/src/GrGpuGL.h
@@ -107,6 +107,7 @@
     virtual void flushScissor(const GrIRect* rect);
     void clearStencil(uint32_t value, uint32_t mask);
     virtual void clearStencilClip(const GrIRect& rect);
+    virtual int getMaxEdges() const;
 
     // binds texture unit in GL
     void setTextureUnit(int unitIdx);
@@ -189,6 +190,9 @@
     // Do we have stencil wrap ops.
     bool fHasStencilWrap;
 
+    // The maximum number of fragment uniform vectors (GLES has min. 16).
+    int fMaxFragmentUniformVectors;
+
     // ES requires an extension to support RGBA8 in RenderBufferStorage
     bool fRGBA8Renderbuffer;
 
diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp
index 08845a9..bbf9719 100644
--- a/gpu/src/GrGpuGLShaders.cpp
+++ b/gpu/src/GrGpuGLShaders.cpp
@@ -193,7 +193,7 @@
         idx = (int)(random.nextF() * (kNumStages+1));
         pdesc.fFirstCoverageStage = idx;
 
-        pdesc.fUsesEdgeAA = (random.nextF() > .5f);
+        pdesc.fEdgeAANumEdges = (random.nextF() * (getMaxEdges() + 1));
 
         for (int s = 0; s < kNumStages; ++s) {
             // enable the stage?
@@ -442,16 +442,17 @@
 void GrGpuGLShaders::flushEdgeAAData() {
     const int& uni = fProgramData->fUniLocations.fEdgesUni;
     if (GrGLProgram::kUnusedUniform != uni) {
-        float edges[18];
-        memcpy(edges, fCurrDrawState.fEdgeAAEdges, sizeof(edges));
+        int count = fCurrDrawState.fEdgeAANumEdges;
+        Edge edges[kMaxEdges];
         // Flip the edges in Y
         float height = fCurrDrawState.fRenderTarget->height();
-        for (int i = 0; i < 6; ++i) {
-            float b = edges[i * 3 + 1];
-            edges[i * 3 + 1] = -b;
-            edges[i * 3 + 2] += b * height;
+        for (int i = 0; i < count; ++i) {
+            edges[i] = fCurrDrawState.fEdgeAAEdges[i];
+            float b = edges[i].fY;
+            edges[i].fY = -b;
+            edges[i].fZ += b * height;
         }
-        GR_GL(Uniform3fv(uni, 6, edges));
+        GR_GL(Uniform3fv(uni, count, &edges[0].fX));
     }
 }
 
@@ -701,7 +702,7 @@
         desc.fColorType = GrGLProgram::ProgramDesc::kAttribute_ColorType;
     }
 
-    desc.fUsesEdgeAA = fCurrDrawState.fFlagBits & kEdgeAA_StateBit;
+    desc.fEdgeAANumEdges = fCurrDrawState.fEdgeAANumEdges;
 
     for (int s = 0; s < kNumStages; ++s) {
         GrGLProgram::ProgramDesc::StageDesc& stage = desc.fStages[s];
diff --git a/gpu/src/GrTesselatedPathRenderer.cpp b/gpu/src/GrTesselatedPathRenderer.cpp
index da6da5c..8a33012 100644
--- a/gpu/src/GrTesselatedPathRenderer.cpp
+++ b/gpu/src/GrTesselatedPathRenderer.cpp
@@ -85,19 +85,7 @@
 GrTesselatedPathRenderer::GrTesselatedPathRenderer() {
 }
 
-class Edge {
-  public:
-    Edge() {}
-    Edge(float x, float y, float z) : fX(x), fY(y), fZ(z) {}
-    GrPoint intersect(const Edge& other) {
-        return GrPoint::Make(
-            (fY * other.fZ - other.fY * fZ) / (fX * other.fY - other.fX * fY),
-            (fX * other.fZ - other.fX * fZ) / (other.fX * fY - fX * other.fY));
-    }
-    float fX, fY, fZ;
-};
-
-typedef GrTDArray<Edge> EdgeArray;
+typedef GrTDArray<GrDrawTarget::Edge> EdgeArray;
 
 bool isCCW(const GrPoint* pts)
 {
@@ -121,15 +109,15 @@
         GrVec tangent = GrVec::Make(p.fY - q.fY, q.fX - p.fX);
         float scale = sign / tangent.length();
         float cross2 = p.fX * q.fY - q.fX * p.fY;
-        Edge edge(tangent.fX * scale,
+        GrDrawTarget::Edge edge(tangent.fX * scale,
                   tangent.fY * scale,
                   cross2 * scale + 0.5f);
         *edges->append() = edge;
         p = q;
     }
-    Edge prev_edge = *edges->back();
+    GrDrawTarget::Edge prev_edge = *edges->back();
     for (size_t i = 0; i < edges->count(); ++i) {
-        Edge edge = edges->at(i);
+        GrDrawTarget::Edge edge = edges->at(i);
         vertices[i] = prev_edge.intersect(edge);
         inverse.mapPoints(&vertices[i], 1);
         prev_edge = edge;
@@ -262,29 +250,30 @@
 
     if (subpathCnt == 1 && !inverted && path.isConvex()) {
         if (target->isAntialiasState()) {
-            target->enableState(GrDrawTarget::kEdgeAA_StateBit);
             EdgeArray edges;
             GrMatrix inverse, matrix = target->getViewMatrix();
             target->getViewInverse(&inverse);
 
             count = computeEdgesAndOffsetVertices(matrix, inverse, base, count, &edges);
-            GrPoint triangle[3];
-            triangle[0] = base[0];
-            Edge triangleEdges[6];
-            triangleEdges[0] = *edges.back();
-            triangleEdges[1] = edges[0];
-            for (size_t i = 1; i < count - 1; i++) {
-                triangle[1] = base[i];
-                triangle[2] = base[i + 1];
-                triangleEdges[2] = edges[i - 1];
-                triangleEdges[3] = edges[i];
-                triangleEdges[4] = edges[i];
-                triangleEdges[5] = edges[i + 1];
-                target->setVertexSourceToArray(layout, triangle, 3);
-                target->setEdgeAAData(&triangleEdges[0].fX);
-                target->drawNonIndexed(kTriangles_PrimitiveType, 0, 3);
+            int maxEdges = target->getMaxEdges();
+            if (count <= maxEdges) {
+                // All edges fit; upload all edges and draw all verts as a fan
+                target->setVertexSourceToArray(layout, base, count);
+                target->setEdgeAAData(&edges[0], count);
+                target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);
+            } else {
+                // Upload "maxEdges" edges and verts at a time, and draw as
+                // separate fans
+                for (size_t i = 0; i < count - 2; i += maxEdges - 2) {
+                    edges[i] = edges[0];
+                    base[i] = base[0];
+                    int size = GR_CT_MIN(count - i, maxEdges);
+                    target->setVertexSourceToArray(layout, &base[i], size);
+                    target->setEdgeAAData(&edges[i], size);
+                    target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, size);
+                }
             }
-            target->disableState(GrDrawTarget::kEdgeAA_StateBit);
+            target->setEdgeAAData(NULL, 0);
         } else {
             target->setVertexSourceToArray(layout, base, count);
             target->drawNonIndexed(kTriangleFan_PrimitiveType, 0, count);