blob: 462bd26b5328e294b4cca231fb4fc381e9604785 [file] [log] [blame]
# Copyright 2020 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#include <xnnpack/assembly.h>
# void xnn_f32_vrelu_ukernel__wasm32_shr_x2(
# size_t n, 0
# const float* x, 1
# float* y, 2
# const union params) 3 unused
# locals
# float value0 4
# float value1 5
# float mask0 6
# float mask1 7
BEGIN_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x2
.functype xnn_f32_vrelu_ukernel__wasm32_shr_x2 (i32, i32, i32, i32) -> ()
.local i32, i32, i32, i32
local.get 0
i32.const 8 # count >= 8
i32.ge_s
if
loop
local.get 1 # src
i32.load 0 # load float from src
local.set 4
local.get 1 # src
i32.load 4 # load 2nd float from src + 4
local.set 5
local.get 1 # src += 8
i32.const 8
i32.add
local.set 1
local.get 4 # (v >> 31) - 1) & v
i32.const 31
i32.shr_u
local.set 6
local.get 5 # 2nd mask
i32.const 31
i32.shr_u
local.set 7
local.get 6
i32.const -1
i32.add
local.set 6
local.get 7
i32.const -1
i32.add
local.set 7
local.get 4
local.get 6
i32.and
local.set 4
local.get 5
local.get 7
i32.and
local.set 5
local.get 2 # dst
local.get 4
i32.store 0 # store float
local.get 2 # dst
local.get 5
i32.store 4 # store 2nd float
local.get 2 # dst += 8
i32.const 8
i32.add
local.set 2
local.get 0
i32.const -8
i32.add # count -= 8
local.set 0
local.get 0
i32.const 8 # count >= 8
i32.ge_s
br_if 0 # loop
end_loop
end_if
local.get 0
i32.const 4 # if count >= 4
i32.ge_s
if
local.get 1 # src
i32.load 0 # load float from src
local.set 4
local.get 4 # (v >> 31) - 1) & v
i32.const 31
i32.shr_u
local.set 5
local.get 5
i32.const -1
i32.add
local.set 5
local.get 4
local.get 5
i32.and
local.set 4
local.get 2 # dst
local.get 4
i32.store 0 # store float
end_if
END_FUNCTION xnn_f32_vrelu_ukernel__wasm32_shr_x2