Optimized implementation of quickReject()

Impl Overview
(1) Keep the device clip bounds up to date.  This
    requires minimal additional work in a few places
    throughout canvas.
(2) Keep track of if the ctm isScaleTranslate.  Yes,
    there's a function that does this, but it's slow
    to call.
(3) Perform the src->device transform in quick reject,
    then check intersection/nan.

Other Notes:
(1) NaN and intersection checks are performed
    simultaneously.
(2) We no longer quick reject infinity.
(3) Affine and perspective are both handled in the slow
    case.
(4) SkRasterClip::isEmpty() is handled by the intersection
    check.

Performance on Nexus 6P:
93.2ms -> 59.8ms

Overall Android Jank Tests Performance Impact:
Should gain us a ms or two on some tests.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2225393002

Committed: https://skia.googlesource.com/skia/+/d22a817ff57986407facd16af36320fc86ce02da
Review-Url: https://codereview.chromium.org/2225393002
diff --git a/bench/QuickRejectBench.cpp b/bench/QuickRejectBench.cpp
new file mode 100644
index 0000000..1feade4
--- /dev/null
+++ b/bench/QuickRejectBench.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "Benchmark.h"
+#include "SkCanvas.h"
+#include "SkRandom.h"
+
+class QuickRejectBench : public Benchmark {
+    enum { N = 1000000 };
+    float fFloats[N];
+    int   fInts  [N];
+
+    const char* onGetName() override { return "quick_reject"; }
+    bool isSuitableFor(Backend backend) override { return backend != kNonRendering_Backend; }
+
+    void onDelayedSetup() override  {
+        SkRandom rand;
+        for (int i = 0; i < N; ++i) {
+            fFloats[i] = 300.0f * (rand.nextSScalar1() + 0.5f);
+        }
+    }
+
+    void onDraw(int loops, SkCanvas* canvas) override {
+        while (loops --> 0) {
+            for (int i = 0; i < N - 4; i++) {
+                if (canvas->quickReject(*(SkRect*)(fFloats+i))) {
+                    fInts[i] = 11;
+                } else {
+                    fInts[i] = 24;
+                }
+            }
+        }
+    }
+};
+DEF_BENCH( return new QuickRejectBench; )
+
+class ConcatBench : public Benchmark {
+    SkMatrix fMatrix;
+
+    const char* onGetName() override { return "concat"; }
+    bool isSuitableFor(Backend backend) override { return backend != kNonRendering_Backend; }
+
+    void onDelayedSetup() override  {
+        SkRandom r;
+        fMatrix.setScale(5.0f, 5.0f);
+        fMatrix.setTranslateX(10.0f);
+        fMatrix.setTranslateY(10.0f);
+    }
+
+    void onDraw(int loops, SkCanvas* canvas) override {
+        while (loops --> 0) {
+            canvas->setMatrix(SkMatrix::MakeScale(3.0f));
+            canvas->concat(fMatrix);
+        }
+    }
+};
+DEF_BENCH( return new ConcatBench; )