blob: 0932c13f5fc5e47e9a6d99cb4a49a19401ac7bf5 [file] [log] [blame]
Marat Dukhanf4db2f32020-06-30 10:55:30 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert BATCH_TILE % 4 == 0
7$assert BATCH_TILE >= 4
8$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
9#include <assert.h>
10#include <math.h>
11
12#include <wasm_simd128.h>
13
14#include <xnnpack/common.h>
15#include <xnnpack/vunary.h>
16
17
18void xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x${BATCH_TILE}(
19 size_t n,
20 const float* x,
21 float* y,
22 const union xnn_f32_sqrt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
23{
24 assert(n != 0);
25 assert(n % sizeof(float) == 0);
26
27 $if BATCH_TILE > 4:
28 for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
29 const v128_t vx${ABC[0:4]} = wasm_v128_load(x);
30 $for N in range(4, BATCH_TILE, 4):
31 const v128_t vx${ABC[N:N+4]} = wasm_v128_load(x + ${N});
32 x += ${BATCH_TILE};
33
34 $for N in range(0, BATCH_TILE, 4):
35 const v128_t vy${ABC[N:N+4]} = wasm_f32x4_sqrt(vx${ABC[N:N+4]});
36
37 wasm_v128_store(y, vy${ABC[0:4]});
38 $for N in range(4, BATCH_TILE, 4):
39 wasm_v128_store(y + ${N}, vy${ABC[N:N+4]});
40 y += ${BATCH_TILE};
41 }
42 for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
43 const v128_t vx = wasm_v128_load(x);
44 x += 4;
45 const v128_t vy = wasm_f32x4_sqrt(vx);
46 wasm_v128_store(y, vy);
47 y += 4;
48 }
49 if XNN_UNLIKELY(n != 0) {
50 do {
51 const float vx = *x++;
52 const float vy = sqrtf(vx);
53 *y++ = vy;
54 n -= sizeof(float);
55 } while (n != 0);
56 }
57}