SkChopCubicAt2 using Sk2s -- 2x faster

BUG=skia:
TBR=

Review URL: https://codereview.chromium.org/1036753002
diff --git a/bench/GeometryBench.cpp b/bench/GeometryBench.cpp
index 24d5c34..9178aa4 100644
--- a/bench/GeometryBench.cpp
+++ b/bench/GeometryBench.cpp
@@ -134,11 +134,11 @@
 
 class QuadBenchBase : public GeometryBench {
 protected:
-    SkPoint fPts[3];
+    SkPoint fPts[4];
 public:
     QuadBenchBase(const char name[]) : GeometryBench(name) {
         SkRandom rand;
-        for (int i = 0; i < 3; ++i) {
+        for (int i = 0; i < 4; ++i) {
             fPts[i].set(rand.nextUScalar1(), rand.nextUScalar1());
         }
     }
@@ -244,3 +244,35 @@
 };
 DEF_BENCH( return new ChopQuadAt1; )
 
+class ChopCubicAt0 : public QuadBenchBase {
+public:
+    ChopCubicAt0() : QuadBenchBase("chopcubicat0") {}
+protected:
+    void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
+        SkPoint dst[7];
+        for (int outer = 0; outer < loops; ++outer) {
+            SkChopCubicAt(fPts, dst, 0.5f);
+            SkChopCubicAt(fPts, dst, 0.5f);
+            SkChopCubicAt(fPts, dst, 0.5f);
+            SkChopCubicAt(fPts, dst, 0.5f);
+        }
+    }
+};
+DEF_BENCH( return new ChopCubicAt0; )
+
+class ChopCubicAt1 : public QuadBenchBase {
+public:
+    ChopCubicAt1() : QuadBenchBase("chopcubicat1") {}
+protected:
+    void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
+        SkPoint dst[7];
+        for (int outer = 0; outer < loops; ++outer) {
+            SkChopCubicAt2(fPts, dst, 0.5f);
+            SkChopCubicAt2(fPts, dst, 0.5f);
+            SkChopCubicAt2(fPts, dst, 0.5f);
+            SkChopCubicAt2(fPts, dst, 0.5f);
+        }
+    }
+};
+DEF_BENCH( return new ChopCubicAt1; )
+
diff --git a/src/core/SkGeometry.cpp b/src/core/SkGeometry.cpp
index 0d64fb5..62c5903 100644
--- a/src/core/SkGeometry.cpp
+++ b/src/core/SkGeometry.cpp
@@ -479,6 +479,31 @@
     interp_cubic_coords(&src[0].fY, &dst[0].fY, t);
 }
 
+void SkChopCubicAt2(const SkPoint src[4], SkPoint dst[7], SkScalar t) {
+    SkASSERT(t > 0 && t < SK_Scalar1);
+
+    Sk2s    p0 = from_point(src[0]);
+    Sk2s    p1 = from_point(src[1]);
+    Sk2s    p2 = from_point(src[2]);
+    Sk2s    p3 = from_point(src[3]);
+    Sk2s    tt(t);
+
+    Sk2s    ab = interp(p0, p1, tt);
+    Sk2s    bc = interp(p1, p2, tt);
+    Sk2s    cd = interp(p2, p3, tt);
+    Sk2s    abc = interp(ab, bc, tt);
+    Sk2s    bcd = interp(bc, cd, tt);
+    Sk2s    abcd = interp(abc, bcd, tt);
+    
+    dst[0] = src[0];
+    dst[1] = to_point(ab);
+    dst[2] = to_point(abc);
+    dst[3] = to_point(abcd);
+    dst[4] = to_point(bcd);
+    dst[5] = to_point(cd);
+    dst[6] = src[3];
+}
+
 /*  http://code.google.com/p/skia/issues/detail?id=32
 
     This test code would fail when we didn't check the return result of
diff --git a/src/core/SkGeometry.h b/src/core/SkGeometry.h
index 5a6dcb5..c8c41f9 100644
--- a/src/core/SkGeometry.h
+++ b/src/core/SkGeometry.h
@@ -95,6 +95,8 @@
     dst[0..3] and dst[3..6]
 */
 void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t);
+void SkChopCubicAt2(const SkPoint src[4], SkPoint dst[7], SkScalar t);
+
 /** Given a src cubic bezier, chop it at the specified t values,
     where 0 < t < 1, and return the new cubics in dst:
     dst[0..3],dst[3..6],...,dst[3*t_count..3*(t_count+1)]
diff --git a/tests/GeometryTest.cpp b/tests/GeometryTest.cpp
index 8a7bfe3..4109c0a 100644
--- a/tests/GeometryTest.cpp
+++ b/tests/GeometryTest.cpp
@@ -58,12 +58,12 @@
             SkEvalQuadAt(pts, t, &r0);
             SkPoint r1 = SkEvalQuadAt(pts, t);
             check_pairs(reporter, i, t, "quad-pos", r0.fX, r0.fY, r1.fX, r1.fY);
-
+            
             SkVector v0;
             SkEvalQuadAt(pts, t, NULL, &v0);
             SkVector v1 = SkEvalQuadTangentAt(pts, t);
             check_pairs(reporter, i, t, "quad-tan", v0.fX, v0.fY, v1.fX, v1.fY);
-
+            
             SkPoint dst0[5], dst1[5];
             SkChopQuadAt(pts,  dst0, t);
             SkChopQuadAt2(pts, dst1, t);
@@ -71,7 +71,30 @@
                 check_pairs(reporter, i, t, "chop-quad",
                             dst0[k].fX, dst0[k].fY, dst1[k].fX, dst1[k].fY);
             }
+            
+            t += dt;
+        }
+    }
+}
 
+static void test_cubicat(skiatest::Reporter* reporter) {
+    SkRandom rand;
+    for (int i = 0; i < 1000; ++i) {
+        SkPoint pts[4];
+        for (int j = 0; j < 4; ++j) {
+            pts[j].set(rand.nextSScalar1() * 100, rand.nextSScalar1() * 100);
+        }
+        const SkScalar dt = SK_Scalar1 / 128;
+        SkScalar t = dt;
+        for (int j = 1; j < 128; ++j) {
+            SkPoint dst0[7], dst1[7];
+            SkChopCubicAt(pts,  dst0, t);
+            SkChopCubicAt2(pts, dst1, t);
+            for (int k = 0; k < 7; ++k) {
+                check_pairs(reporter, i, t, "chop-cubic",
+                            dst0[k].fX, dst0[k].fY, dst1[k].fX, dst1[k].fY);
+            }
+            
             t += dt;
         }
     }
@@ -156,4 +179,5 @@
     testChopCubic(reporter);
     test_evalquadat(reporter);
     test_conic(reporter);
+    test_cubicat(reporter);
 }