Replace _mm_cvtps_epi32(x) with _mm_cvttps_epi32(_mm_add_ps(0.5f), x).

We don't have control over which way _mm_cvtps_epi32 rounds.

  - This makes the SSE SkPMFloat rounding consistent with _neon and _none.
  - Sk4f::cast<Sk4i>() is closer to (int)float's behavior.  (Correct when >=0).

Add tests that would fail at head.

BUG=skia:

Review URL: https://codereview.chromium.org/1029163002
diff --git a/tests/PMFloatTest.cpp b/tests/PMFloatTest.cpp
index c3d5e48..5b53fc6 100644
--- a/tests/PMFloatTest.cpp
+++ b/tests/PMFloatTest.cpp
@@ -11,8 +11,8 @@
     REPORTER_ASSERT(r, SkScalarNearlyEqual( 51.0f, pmf.b()));
     REPORTER_ASSERT(r, c == pmf.get());
 
-    // Test rounding.  (Don't bother testing .5... we don't care which way it goes.)
-    pmf = SkPMFloat(254.6f, 204.3f, 153.1f, 50.8f);
+    // Test rounding.
+    pmf = SkPMFloat(254.5f, 203.5f, 153.1f, 50.8f);
     REPORTER_ASSERT(r, c == pmf.get());
 
     // Test clamping.
diff --git a/tests/Sk4xTest.cpp b/tests/Sk4xTest.cpp
index d7a016c..8c3b977 100644
--- a/tests/Sk4xTest.cpp
+++ b/tests/Sk4xTest.cpp
@@ -59,6 +59,9 @@
     ASSERT_NE(twoi, twof.reinterpret<Sk4i>());
     ASSERT_EQ(twof, twoi.cast<Sk4f>());
     ASSERT_NE(twof, twoi.reinterpret<Sk4f>());
+
+    ASSERT_EQ(Sk4i(0,0,0,0), Sk4f(0.5f, 0.49f, 0.51f, 0.99f).cast<Sk4i>());
+    ASSERT_EQ(Sk4i(1,1,1,1), Sk4f(1.5f, 1.49f, 1.51f, 1.99f).cast<Sk4i>());
 }
 
 DEF_TEST(Sk4x_Bits, r) {