blob: 4093e011d026021c88699f82ce28cd72fb8f9df5 [file] [log] [blame]
# Copyright 2020 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#include <xnnpack/assembly.h>
# void xnn_f32_vrelu_ukernel__wasm32_shr_x4(
# size_t n, 0
# const float* x, 1
# float* y, 2
# const union params) 3 unused
# locals
# float value0 4
# float value1 5
# float value2 6
# float value3 7
# float mask0 8
# float mask1 9
# float mask2 10
# float mask3 11
BEGIN_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4
.functype xnn_f32_vrelu_ukernel__wasm32_shr_x4 (i32, i32, i32, i32) -> ()
.local i32, i32, i32, i32, i32, i32, i32, i32
local.get 0
i32.const 16 # count >= 16
i32.ge_s
if
loop
local.get 1
i32.load 0 # load 4 floats from src
local.set 4
local.get 1
i32.load 4
local.set 5
local.get 1
i32.load 8
local.set 6
local.get 1
i32.load 12
local.set 7
local.get 4 # (v >> 31) - 1) & v
i32.const 31
i32.shr_u
local.set 8
local.get 5
i32.const 31
i32.shr_u
local.set 9
local.get 6
i32.const 31
i32.shr_u
local.set 10
local.get 7
i32.const 31
i32.shr_u
local.set 11
local.get 8
i32.const -1
i32.add
local.set 8
local.get 9
i32.const -1
i32.add
local.set 9
local.get 10
i32.const -1
i32.add
local.set 10
local.get 11
i32.const -1
i32.add
local.set 11
local.get 4
local.get 8
i32.and
local.set 4
local.get 5
local.get 9
i32.and
local.set 5
local.get 6
local.get 10
i32.and
local.set 6
local.get 7
local.get 11
i32.and
local.set 7
local.get 2
local.get 4
i32.store 0 # store 4 floats
local.get 2
local.get 5
i32.store 4
local.get 2
local.get 6
i32.store 8
local.get 2
local.get 7
i32.store 12
local.get 2 # dst += 16
i32.const 16
i32.add
local.set 2
local.get 1 # src += 16
i32.const 16
i32.add
local.set 1
local.get 0
i32.const -16
i32.add # count -= 16
local.set 0
local.get 0
i32.const 16 # count >= 16
i32.ge_s
br_if 0 # loop
end_loop
end_if
local.get 0
i32.const 4 # if count >= 4
i32.ge_s
if
loop
local.get 1 # src
i32.load 0 # load float from src
local.set 4
local.get 1 # src += 4
i32.const 4
i32.add
local.set 1
local.get 4 # (v >> 31) - 1) & v
i32.const 31
i32.shr_u
local.set 5
local.get 5
i32.const -1
i32.add
local.set 5
local.get 4
local.get 5
i32.and
local.set 4
local.get 2 # dst
local.get 4
i32.store 0 # store float
local.get 2 # dst += 4
i32.const 4
i32.add
local.set 2
local.get 0
i32.const -4
i32.add # count -= 4
local.set 0
local.get 0
i32.const 4 # count >= 4
i32.ge_s
br_if 0 # loop
end_loop
end_if
END_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x4