Fix memcpy32_sse2_unalign. The whole point of mempcy32_sse2_unalign is that we didn't align dst128 and src128. So it's not safe at all to cast them back to dst and src. That tells the compiler that dst/src are 128-bit aligned, and then it autovectorizes the cleanup while-loop using that (false) knowledge with aligned SSE instructions. This leads to crashes on memcpy32_sse2_unalign_10, which is small enough that we actually get non-16-byte aligned memory. The larger size benches could be crashing too, but they're big enough allocations that they're probably always 16-byte aligned anyway. BUG=skia:2589 R=fmalita@chromium.org, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/291893008 git-svn-id: http://skia.googlecode.com/svn/trunk@14851 2bbb7eff-a529-9590-31e7-b0007b416f81

commit: 0be2d8354b0c99bd099c4c6dd78a7fcb69955fa4 [log] [tgz]
author: commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> Thu May 22 18:24:42 2014 +0000
committer: commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81> Thu May 22 18:24:42 2014 +0000
tree: fd779953e81a886bfabd8ff2f431fe6921c6420b
parent: 7cb5e4797ed7ecb200f90889687a72940efa6d98 [diff]
diff --git a/bench/MemcpyBench.cpp b/bench/MemcpyBench.cpp
index 0bb7b61..aec2a47 100644
--- a/bench/MemcpyBench.cpp
+++ b/bench/MemcpyBench.cpp

@@ -90,6 +90,8 @@
 
         __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
         const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
+        dst += 16 * (count / 16);
+        src += 16 * (count / 16);
         while (count >= 16) {
             __m128i a = _mm_loadu_si128(src128++);
             __m128i b = _mm_loadu_si128(src128++);
@@ -103,9 +105,6 @@
 
             count -= 16;
         }
-
-        dst = reinterpret_cast<uint32_t*>(dst128);
-        src = reinterpret_cast<const uint32_t*>(src128);
     }
 
     while (count --> 0) {
@@ -122,6 +121,8 @@
 static void memcpy32_sse2_unalign(uint32_t* dst, const uint32_t* src, int count) {
     __m128i* dst128 = reinterpret_cast<__m128i*>(dst);
     const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
+    dst += 16 * (count / 16);
+    src += 16 * (count / 16);
     while (count >= 16) {
         __m128i a = _mm_loadu_si128(src128++);
         __m128i b = _mm_loadu_si128(src128++);
@@ -136,14 +137,11 @@
         count -= 16;
     }
 
-    dst = reinterpret_cast<uint32_t*>(dst128);
-    src = reinterpret_cast<const uint32_t*>(src128);
     while (count --> 0) {
         *dst++ = *src++;
     }
 }
-// skia:2589: Crashing on ChromeOS Alex bot.  TODO(mtklein): why?
-//BENCH(memcpy32_sse2_unalign, 10)
+BENCH(memcpy32_sse2_unalign, 10)
 BENCH(memcpy32_sse2_unalign, 100)
 BENCH(memcpy32_sse2_unalign, 1000)
 BENCH(memcpy32_sse2_unalign, 10000)
commit	0be2d8354b0c99bd099c4c6dd78a7fcb69955fa4	[log] [tgz]
author	commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>	Thu May 22 18:24:42 2014 +0000
committer	commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>	Thu May 22 18:24:42 2014 +0000
tree	fd779953e81a886bfabd8ff2f431fe6921c6420b
parent	7cb5e4797ed7ecb200f90889687a72940efa6d98 [diff]