Fix alignment problems in NEON Sk4b.
As written at head, the compiler can assume these loads and stores are 4
byte aligned [1]. We want Sk4b to load from any 1-byte aligned address,
to prevent crashes like [2].
[1] https://llvm.org/bugs/show_bug.cgi?id=24421
[2] https://luci-milo.appspot.com/swarming/task/304079e125b1b910/steps/nanobench/0/stdout
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2183133002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review-Url: https://codereview.chromium.org/2183133002
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index df11de3..23567b7 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -296,6 +296,8 @@
template <>
class SkNx<4, uint8_t> {
public:
+ typedef uint32_t __attribute__((aligned(1))) unaligned_uint32_t;
+
SkNx(const uint8x8_t& vec) : fVec(vec) {}
SkNx() {}
@@ -303,10 +305,10 @@
fVec = (uint8x8_t){a,b,c,d, 0,0,0,0};
}
static SkNx Load(const void* ptr) {
- return (uint8x8_t)vld1_dup_u32((const uint32_t*)ptr);
+ return (uint8x8_t)vld1_dup_u32((const unaligned_uint32_t*)ptr);
}
void store(void* ptr) const {
- return vst1_lane_u32((uint32_t*)ptr, (uint32x2_t)fVec, 0);
+ return vst1_lane_u32((unaligned_uint32_t*)ptr, (uint32x2_t)fVec, 0);
}
uint8_t operator[](int k) const {
SkASSERT(0 <= k && k < 4);