Crop the fast path dashed lines to the cull rect

Without:

maxrss  loops   min     median  mean    max     stddev  samples         config  bench
  56M   1       13.3ms  13.6ms  13.6ms  14.2ms  2%      Ooooo.....      8888    GM_dashing5_bw
  56M   13      390us   417us   416us   459us   5%      ooooO..o.o      gpu     GM_dashing5_bw
  56M   1       13.4ms  13.9ms  14.1ms  15ms    3%      Oooo..ooOo      8888    GM_dashing5_aa
  56M   13      402us   421us   416us   425us   2%      Ooo.ooOOOO      gpu     GM_dashing5_aa

With:

  40M   1       1.53ms  1.54ms  1.54ms  1.55ms  0%      oo.O...o..      8888    GM_dashing5_bw
  40M   12      407us   412us   415us   445us   3%      ...Oo.....      gpu     GM_dashing5_bw
  40M   1       1.7ms   1.7ms   1.7ms   1.72ms  0%      o.O.......      8888    GM_dashing5_aa
  43M   13      405us   409us   409us   415us   1%      ooo.Ooo..o      gpu     GM_dashing5_aa

The GM images (including the new one) are the same with and without this CL.

BUG=428296

Review URL: https://codereview.chromium.org/699623003
diff --git a/src/effects/SkDashPathEffect.cpp b/src/effects/SkDashPathEffect.cpp
index f9a56d0..a053066 100644
--- a/src/effects/SkDashPathEffect.cpp
+++ b/src/effects/SkDashPathEffect.cpp
@@ -41,6 +41,116 @@
                                       fInitialDashLength, fInitialDashIndex, fIntervalLength);
 }
 
+static void outset_for_stroke(SkRect* rect, const SkStrokeRec& rec) {
+    SkScalar radius = SkScalarHalf(rec.getWidth());
+    if (0 == radius) {
+        radius = SK_Scalar1;    // hairlines
+    }
+    if (SkPaint::kMiter_Join == rec.getJoin()) {
+        radius = SkScalarMul(radius, rec.getMiter());
+    }
+    rect->outset(radius, radius);
+}
+
+// Attempt to trim the line to minimally cover the cull rect (currently 
+// only works for horizontal and vertical lines).
+// Return true if processing should continue; false otherwise.
+static bool cull_line(SkPoint* pts, const SkStrokeRec& rec,
+                      const SkMatrix& ctm, const SkRect* cullRect,
+                      const SkScalar intervalLength) {
+    if (NULL == cullRect) {
+        SkASSERT(false); // Shouldn't ever occur in practice
+        return false;
+    }
+
+    SkScalar dx = pts[1].x() - pts[0].x();
+    SkScalar dy = pts[1].y() - pts[0].y();
+
+    if (dx && dy) {
+        return false;
+    }
+
+    SkRect bounds = *cullRect;
+    outset_for_stroke(&bounds, rec);
+
+    // cullRect is in device space while pts are in the local coordinate system
+    // defined by the ctm. We want our answer in the local coordinate system.
+
+    SkASSERT(ctm.rectStaysRect());
+    SkMatrix inv;
+    if (!ctm.invert(&inv)) {
+        return false;
+    }
+
+    inv.mapRect(&bounds);
+
+    if (dx) {
+        SkASSERT(dx && !dy);
+        SkScalar minX = pts[0].fX;
+        SkScalar maxX = pts[1].fX;
+
+        if (dx < 0) {
+            SkTSwap(minX, maxX);
+        }
+
+        SkASSERT(minX < maxX);
+        if (maxX < bounds.fLeft || minX > bounds.fRight) {
+            return false;
+        }
+
+        // Now we actually perform the chop, removing the excess to the left and
+        // right of the bounds (keeping our new line "in phase" with the dash,
+        // hence the (mod intervalLength).
+
+        if (minX < bounds.fLeft) {
+            minX = bounds.fLeft - SkScalarMod(bounds.fLeft - minX, intervalLength);
+        }
+        if (maxX > bounds.fRight) {
+            maxX = bounds.fRight + SkScalarMod(maxX - bounds.fRight, intervalLength);
+        }
+
+        SkASSERT(maxX > minX);
+        if (dx < 0) {
+            SkTSwap(minX, maxX);
+        }
+        pts[0].fX = minX;
+        pts[1].fX = maxX;
+    } else {
+        SkASSERT(dy && !dx);
+        SkScalar minY = pts[0].fY;
+        SkScalar maxY = pts[1].fY;
+
+        if (dy < 0) {
+            SkTSwap(minY, maxY);
+        }
+
+        SkASSERT(minY < maxY);
+        if (maxY < bounds.fTop || minY > bounds.fBottom) {
+            return false;
+        }
+
+        // Now we actually perform the chop, removing the excess to the top and
+        // bottom of the bounds (keeping our new line "in phase" with the dash,
+        // hence the (mod intervalLength).
+
+        if (minY < bounds.fTop) {
+            minY = bounds.fTop - SkScalarMod(bounds.fTop - minY, intervalLength);
+        }
+        if (maxY > bounds.fBottom) {
+            maxY = bounds.fBottom + SkScalarMod(maxY - bounds.fBottom, intervalLength);
+        }
+
+        SkASSERT(maxY > minY);
+        if (dy < 0) {
+            SkTSwap(minY, maxY);
+        }
+        pts[0].fY = minY;
+        pts[1].fY = maxY;
+    }
+
+    return true;
+}
+
 // Currently asPoints is more restrictive then it needs to be. In the future
 // we need to:
 //      allow kRound_Cap capping (could allow rotations in the matrix with this)
@@ -83,7 +193,12 @@
         return false;
     }
 
-    SkScalar        length = SkPoint::Distance(pts[1], pts[0]);
+    // See if the line can be limited to something plausible.
+    if (!cull_line(pts, rec, matrix, cullRect, fIntervalLength)) {
+        return false;
+    }
+
+    SkScalar length = SkPoint::Distance(pts[1], pts[0]);
 
     SkVector tangent = pts[1] - pts[0];
     if (tangent.isZero()) {
@@ -94,9 +209,11 @@
 
     // TODO: make this test for horizontal & vertical lines more robust
     bool isXAxis = true;
-    if (SK_Scalar1 == tangent.fX || -SK_Scalar1 == tangent.fX) {
+    if (SkScalarNearlyEqual(SK_Scalar1, tangent.fX) ||
+        SkScalarNearlyEqual(-SK_Scalar1, tangent.fX)) {
         results->fSize.set(SkScalarHalf(fIntervals[0]), SkScalarHalf(rec.getWidth()));
-    } else if (SK_Scalar1 == tangent.fY || -SK_Scalar1 == tangent.fY) {
+    } else if (SkScalarNearlyEqual(SK_Scalar1, tangent.fY) ||
+               SkScalarNearlyEqual(-SK_Scalar1, tangent.fY)) {
         results->fSize.set(SkScalarHalf(rec.getWidth()), SkScalarHalf(fIntervals[0]));
         isXAxis = false;
     } else if (SkPaint::kRound_Cap != rec.getCap()) {