Handle stroked single line special case in Ganesh

This CL roughly halves the time spent on the captured stroked lines skp.
On my Linux desktop it boosts the external benchmark from 2618 to 5007.

This is a companion to: https://codereview.chromium.org/2019193002/ (Add new GM to exercise stroked line special case)

The idea is to land the GM first so any regressions are visible.

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2023693002

Review-Url: https://codereview.chromium.org/2023693002
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 10bcecf..20afada 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -692,12 +692,78 @@
 #include "SkMaskFilter.h"
 
 ///////////////////////////////////////////////////////////////////////////////
+void SkGpuDevice::drawStrokedLine(const SkPoint points[2],
+                                  const SkDraw& draw,
+                                  const SkPaint& origPaint) {
+    ASSERT_SINGLE_OWNER
+    GR_CREATE_TRACE_MARKER_CONTEXT("SkGpuDevice", "drawStrokedLine", fContext);
+    CHECK_SHOULD_DRAW(draw);
+
+    // Adding support for round capping would require a GrDrawContext::fillRRectWithLocalMatrix
+    // entry point
+    SkASSERT(SkPaint::kRound_Cap != origPaint.getStrokeCap());
+    SkASSERT(SkPaint::kStroke_Style == origPaint.getStyle());
+    SkASSERT(!origPaint.getPathEffect());
+    SkASSERT(!origPaint.getMaskFilter());
+
+    const SkScalar halfWidth = 0.5f * origPaint.getStrokeWidth();
+    SkASSERT(halfWidth > 0);
+
+    SkVector v = points[1] - points[0];
+
+    SkScalar length = SkPoint::Normalize(&v);
+    if (!length) {
+        v.fX = 1.0f;
+        v.fY = 0.0f;
+    }
+
+    SkPaint newPaint(origPaint);
+    newPaint.setStyle(SkPaint::kFill_Style);
+
+    SkScalar xtraLength = 0.0f;
+    if (SkPaint::kButt_Cap != origPaint.getStrokeCap()) {
+        xtraLength = halfWidth;
+    }
+
+    SkPoint mid = points[0] + points[1];
+    mid.scale(0.5f);
+
+    SkRect rect = SkRect::MakeLTRB(mid.fX-halfWidth, mid.fY - 0.5f*length - xtraLength,
+                                   mid.fX+halfWidth, mid.fY + 0.5f*length + xtraLength);
+    SkMatrix m;
+    m.setSinCos(v.fX, -v.fY, mid.fX, mid.fY);
+
+    SkMatrix local = m;
+
+    m.postConcat(*draw.fMatrix);
+
+    GrPaint grPaint;
+    if (!SkPaintToGrPaint(this->context(), newPaint, m,
+                          this->surfaceProps().isGammaCorrect(), &grPaint)) {
+        return;
+    }
+
+    fDrawContext->fillRectWithLocalMatrix(fClip, grPaint, m, rect, local);
+}
 
 void SkGpuDevice::drawPath(const SkDraw& draw, const SkPath& origSrcPath,
                            const SkPaint& paint, const SkMatrix* prePathMatrix,
                            bool pathIsMutable) {
     ASSERT_SINGLE_OWNER
     if (!origSrcPath.isInverseFillType() && !paint.getPathEffect() && !prePathMatrix) {
+        SkPoint points[2];
+        if (SkPaint::kStroke_Style == paint.getStyle() && paint.getStrokeWidth() > 0 &&
+            !paint.getMaskFilter() && SkPaint::kRound_Cap != paint.getStrokeCap() &&
+            draw.fMatrix->preservesRightAngles() && origSrcPath.isLine(points)) {
+            // Path-based stroking looks better for thin rects
+            SkScalar strokeWidth = draw.fMatrix->getMaxScale() * paint.getStrokeWidth();
+            if (strokeWidth > 0.9f) {
+                // Round capping support is currently disabled b.c. it would require
+                // a RRect batch that takes a localMatrix.
+                this->drawStrokedLine(points, draw, paint);
+                return;
+            }
+        }
         bool isClosed;
         SkRect rect;
         if (origSrcPath.isRect(&rect, &isClosed) && isClosed) {
diff --git a/src/gpu/SkGpuDevice.h b/src/gpu/SkGpuDevice.h
index 474a030..4c6a0f3 100644
--- a/src/gpu/SkGpuDevice.h
+++ b/src/gpu/SkGpuDevice.h
@@ -252,6 +252,7 @@
                           const SkRect& dst, const SkPaint&);
 
     bool drawDashLine(const SkPoint pts[2], const SkPaint& paint);
+    void drawStrokedLine(const SkPoint pts[2], const SkDraw&, const SkPaint&);
 
     static sk_sp<GrDrawContext> CreateDrawContext(GrContext*,
                                                   SkBudgeted,
diff --git a/src/gpu/batches/GrAAFillRectBatch.cpp b/src/gpu/batches/GrAAFillRectBatch.cpp
index 859328a..2c90b8b 100644
--- a/src/gpu/batches/GrAAFillRectBatch.cpp
+++ b/src/gpu/batches/GrAAFillRectBatch.cpp
@@ -84,10 +84,12 @@
     SkPoint* fan0Pos = reinterpret_cast<SkPoint*>(verts);
     SkPoint* fan1Pos = reinterpret_cast<SkPoint*>(verts + 4 * vertexStride);
 
-    SkScalar inset = SkMinScalar(devRect.width(), SK_Scalar1);
-    inset = SK_ScalarHalf * SkMinScalar(inset, devRect.height());
+    SkScalar inset;
 
     if (viewMatrix.rectStaysRect()) {
+        inset = SkMinScalar(devRect.width(), SK_Scalar1);
+        inset = SK_ScalarHalf * SkMinScalar(inset, devRect.height());
+
         set_inset_fan(fan0Pos, vertexStride, devRect, -SK_ScalarHalf, -SK_ScalarHalf);
         set_inset_fan(fan1Pos, vertexStride, devRect, inset,  inset);
     } else {
@@ -97,11 +99,14 @@
           { viewMatrix[SkMatrix::kMSkewX],  viewMatrix[SkMatrix::kMScaleY] }
         };
 
-        vec[0].normalize();
+        SkScalar len1 = SkPoint::Normalize(&vec[0]);
         vec[0].scale(SK_ScalarHalf);
-        vec[1].normalize();
+        SkScalar len2 = SkPoint::Normalize(&vec[1]);
         vec[1].scale(SK_ScalarHalf);
 
+        inset = SkMinScalar(len1 * rect.width(), SK_Scalar1);
+        inset = SK_ScalarHalf * SkMinScalar(inset, len2 * rect.height());
+
         // create the rotated rect
         fan0Pos->setRectFan(rect.fLeft, rect.fTop,
                             rect.fRight, rect.fBottom, vertexStride);