specialize arm64 allTrue()/anyTrue()

aarch64 added vector-wise add/mul/min/max instructions.
We can use min and max to implement allTrue() and anyTrue(),
respectively.

(This CL is mostly so I don't forget these intrinsics exist.)

In assembly, these actually compile to two instructions,
the folding operation into a vector register, then a move
from the vector register to a general purpose register.

Change-Id: Ia6a999ac250740de765e871094e911979a8711c7
Reviewed-on: https://skia-review.googlesource.com/116482
Reviewed-by: Chris Dalton <csmartdalton@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index 9e2c27e..9901f7a 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -454,3 +454,17 @@
     REPORTER_ASSERT(r, dst[6] == 6);
     REPORTER_ASSERT(r, dst[7] == 7);
 }
+
+DEF_TEST(SkNf_anyTrue_allTrue, r) {
+    REPORTER_ASSERT(r,  (Sk2f{1,2} < Sk2f{3,4}).anyTrue());
+    REPORTER_ASSERT(r,  (Sk2f{1,2} < Sk2f{3,4}).allTrue());
+    REPORTER_ASSERT(r,  (Sk2f{3,2} < Sk2f{1,4}).anyTrue());
+    REPORTER_ASSERT(r, !(Sk2f{3,2} < Sk2f{1,4}).allTrue());
+    REPORTER_ASSERT(r, !(Sk2f{3,4} < Sk2f{1,2}).anyTrue());
+
+    REPORTER_ASSERT(r,  (Sk4f{1,2,3,4} < Sk4f{3,4,5,6}).anyTrue());
+    REPORTER_ASSERT(r,  (Sk4f{1,2,3,4} < Sk4f{3,4,5,6}).allTrue());
+    REPORTER_ASSERT(r,  (Sk4f{1,2,3,4} < Sk4f{1,4,1,1}).anyTrue());
+    REPORTER_ASSERT(r, !(Sk4f{1,2,3,4} < Sk4f{1,4,1,1}).allTrue());
+    REPORTER_ASSERT(r, !(Sk4f{3,4,5,6} < Sk4f{1,2,3,4}).anyTrue());
+}