Add Store3 to Sk2f

Bug: skia:
Change-Id: I0377e6a1dd8259e944f7902a5c68af524fa588c7
Reviewed-on: https://skia-review.googlesource.com/79382
Reviewed-by: Mike Klein <mtklein@chromium.org>
Commit-Queue: Chris Dalton <csmartdalton@google.com>
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 91c978b..14af118 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -92,6 +92,11 @@
         Half::Store4(ptr,                   a.fLo, b.fLo, c.fLo, d.fLo);
         Half::Store4(ptr + 4*N/2*sizeof(T), a.fHi, b.fHi, c.fHi, d.fHi);
     }
+    AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto ptr = (char*)vptr;
+        Half::Store3(ptr,                   a.fLo, b.fLo, c.fLo);
+        Half::Store3(ptr + 3*N/2*sizeof(T), a.fHi, b.fHi, c.fHi);
+    }
 
     AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
     AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
@@ -189,6 +194,12 @@
         c.store(ptr + 2*sizeof(T));
         d.store(ptr + 3*sizeof(T));
     }
+    AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto ptr = (char*)vptr;
+        a.store(ptr + 0*sizeof(T));
+        b.store(ptr + 1*sizeof(T));
+        c.store(ptr + 2*sizeof(T));
+    }
 
     AI bool anyTrue() const { return fVal != 0; }
     AI bool allTrue() const { return fVal != 0; }
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index b114f8f..8d1a249 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -34,6 +34,15 @@
     AI static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
     AI void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
 
+    AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
+        float32x2x3_t abc = {{
+            a.fVec,
+            b.fVec,
+            c.fVec,
+        }};
+        vst3_f32((float*) dst, abc);
+    }
+
     AI SkNx invert() const {
         float32x2_t est0 = vrecpe_f32(fVec),
                     est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index a543249..dc9a594 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -29,6 +29,13 @@
 
     AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
 
+    AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
+        auto lo = _mm_setr_ps(a[0], b[0], c[0], a[1]),
+             hi = _mm_setr_ps(b[1], c[1],    0,    0);
+        _mm_storeu_ps((float*)dst, lo);
+        _mm_storel_pi(((__m64*)dst) + 2, hi);
+    }
+
     AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
 
     AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index 069f1bc..349ffb6 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -423,3 +423,17 @@
     REPORTER_ASSERT(r, y[2] == 5);
     REPORTER_ASSERT(r, y[3] == 7);
 }
+
+DEF_TEST(Sk2f_Store3, r) {
+    Sk2f p0{0, 3};
+    Sk2f p1{1, 4};
+    Sk2f p2{2, 5};
+    float dst[6];
+    Sk2f::Store3(dst, p0, p1, p2);
+    REPORTER_ASSERT(r, dst[0] == 0);
+    REPORTER_ASSERT(r, dst[1] == 1);
+    REPORTER_ASSERT(r, dst[2] == 2);
+    REPORTER_ASSERT(r, dst[3] == 3);
+    REPORTER_ASSERT(r, dst[4] == 4);
+    REPORTER_ASSERT(r, dst[5] == 5);
+}