Mike's radial gradient CL with better float -> int.

patch from issue 1072303005 at patchset 40001 (http://crrev.com/1072303005#ps40001)

This looks quite launchable.  radial_gradient3, min of 100 samples:
  N5:  985µs -> 946µs
  MBP: 395µs -> 279µs

On my MBP, most of the meat looks like it's now in reading the cache and writing to dst one color at a time.  Is that something we could do in float math rather than with a lookup table?

BUG=skia:

CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Mac10.8-Clang-Arm7-Debug-Android-Trybot,Build-Ubuntu-GCC-Arm7-Release-Android_NoNeon-Trybot

Committed: https://skia.googlesource.com/skia/+/abf6c5cf95e921fae59efb487480e5b5081cf0ec

Review URL: https://codereview.chromium.org/1109643002
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index af6918e..25ece38 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -89,6 +89,12 @@
           case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d);
           case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b);
         }
+        switch (N) {
+          case 8: REPORTER_ASSERT(r, v.template kth<4>() == e && v.template kth<5>() == f &&
+                                     v.template kth<6>() == g && v.template kth<7>() == h);
+          case 4: REPORTER_ASSERT(r, v.template kth<2>() == c && v.template kth<3>() == d);
+          case 2: REPORTER_ASSERT(r, v.template kth<0>() == a && v.template kth<1>() == b);
+        }
     };
 
     T vals[] = { 1,2,3,4,5,6,7,8 };
@@ -117,4 +123,8 @@
     test_Ni<2, uint16_t>(r);
     test_Ni<4, uint16_t>(r);
     test_Ni<8, uint16_t>(r);
+
+    test_Ni<2, int>(r);
+    test_Ni<4, int>(r);
+    test_Ni<8, int>(r);
 }