blob: 78cbefe05333bb99d782d450692a1522eee311c1 [file] [log] [blame]
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7#include <stddef.h>
8
9#include <smmintrin.h>
10
11#include <xnnpack/math-stubs.h>
12
13
14void xnn_math_f32_roundz__sse41(
15 size_t n,
16 const float* input,
17 float* output)
18{
19 assert(n % (4 * sizeof(float)) == 0);
20
21 for (; n != 0; n -= 4 * sizeof(float)) {
22 const __m128 vx = _mm_load_ps(input);
23 input += 4;
24
25 const __m128 vy = _mm_round_ps(vx, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
26
27 _mm_store_ps(output, vy);
28 output += 4;
29 }
30}