blob: 3802093b49ac63915564b9e20a601cf912b0657e [file] [log] [blame]
Marat Dukhanc07cb7f2019-11-14 15:32:05 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert BATCH_TILE >= 1
Marat Dukhanf32ae342020-03-10 19:21:17 -07007$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
Marat Dukhan13bafb02020-06-05 00:43:11 -07008$assert OP in ["ADD", "DIV", "MAX", "MIN", "MUL", "SUB", "SQRDIFF"]
Frank Barchard674778d2020-08-08 10:17:25 -07009$assert ACTIVATION in ["LINEAR", "MINMAX", "RELU"]
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080010#include <assert.h>
11
12#include <xnnpack/common.h>
13#include <xnnpack/math.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080014#include <xnnpack/vbinary.h>
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080015
16
Marat Dukhan436ebe62019-12-04 15:10:12 -080017$MIN_F32 = "__builtin_wasm_min_f32" if WASM else "math_min_f32"
18$MAX_F32 = "__builtin_wasm_max_f32" if WASM else "math_max_f32"
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080019$OP_FUNC = {
20$ "ADD": lambda x, y: "%s + %s" % (x, y),
Marat Dukhan77ca6302019-12-06 12:48:15 -080021$ "DIV": lambda x, y: "%s / %s" % (x, y),
Marat Dukhan403b7d42019-12-05 12:49:11 -080022$ "MAX": lambda x, y: "%s(%s, %s)" % (MAX_F32, x, y),
23$ "MIN": lambda x, y: "%s(%s, %s)" % (MIN_F32, x, y),
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080024$ "MUL": lambda x, y: "%s * %s" % (x, y),
25$ "SUB": lambda x, y: "%s - %s" % (x, y),
Marat Dukhan13bafb02020-06-05 00:43:11 -070026$ "SQRDIFF": lambda x, y: "%s - %s" % (x, y),
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080027$}[OP]
Frank Barchard674778d2020-08-08 10:17:25 -070028$SUFFIX = {"LINEAR": "", "RELU": "_relu", "MINMAX": "_minmax"}[ACTIVATION]
29$PARAMS = {"LINEAR": "xnn_f32_default_params", "RELU": "xnn_f32_relu_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION]
Marat Dukhan91cd2b72020-04-09 23:57:31 -070030void xnn_f32_v${OP.lower()}${SUFFIX}_ukernel__${"wasm" if WASM else "scalar"}_x${BATCH_TILE}(
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080031 size_t n,
32 const float* a,
33 const float* b,
34 float* y,
Marat Dukhanf196d012020-04-15 11:50:03 -070035 const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)])
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080036{
37 assert(n != 0);
38 assert(n % sizeof(float) == 0);
Frank Barchard0822dde2020-07-04 12:47:24 -070039 assert(a != NULL);
40 assert(b != NULL);
41 assert(y != NULL);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080042
Marat Dukhan91cd2b72020-04-09 23:57:31 -070043 $if ACTIVATION == "MINMAX":
44 const float vy_min = params->scalar.min;
45 const float vy_max = params->scalar.max;
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080046
47 $if BATCH_TILE > 1:
48 for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
49 $for N in range(BATCH_TILE):
50 const float va${ABC[N]} = a[${N}];
51 a += ${BATCH_TILE};
52
53 $for N in range(BATCH_TILE):
54 const float vb${ABC[N]} = b[${N}];
55 b += ${BATCH_TILE};
56
57 $for N in range(BATCH_TILE):
58 float vy${ABC[N]} = ${OP_FUNC("va" + ABC[N], "vb" + ABC[N])};
59
Marat Dukhan13bafb02020-06-05 00:43:11 -070060 $if OP == "SQRDIFF":
61 $for N in range(BATCH_TILE):
62 vy${ABC[N]} = vy${ABC[N]} * vy${ABC[N]};
63
Marat Dukhan91cd2b72020-04-09 23:57:31 -070064 $if ACTIVATION == "MINMAX":
65 $for N in range(BATCH_TILE):
66 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, vy_min);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080067
Marat Dukhan91cd2b72020-04-09 23:57:31 -070068 $for N in range(BATCH_TILE):
69 vy${ABC[N]} = ${MIN_F32}(vy${ABC[N]}, vy_max);
Frank Barchard674778d2020-08-08 10:17:25 -070070 $elif ACTIVATION == "RELU":
71 $for N in range(BATCH_TILE):
72 vy${ABC[N]} = ${MAX_F32}(vy${ABC[N]}, 0.0f);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080073
74 $for N in range(BATCH_TILE):
75 y[${N}] = vy${ABC[N]};
76 y += ${BATCH_TILE};
77 }
78 if XNN_UNLIKELY(n != 0) {
79 $if BATCH_TILE > 2:
80 do {
81 const float va = *a++;
82 const float vb = *b++;
83 float vy = ${OP_FUNC("va", "vb")};
Marat Dukhan13bafb02020-06-05 00:43:11 -070084 $if OP == "SQRDIFF":
85 vy = vy * vy;
Marat Dukhan91cd2b72020-04-09 23:57:31 -070086 $if ACTIVATION == "MINMAX":
87 vy = ${MAX_F32}(vy, vy_min);
88 vy = ${MIN_F32}(vy, vy_max);
Frank Barchard674778d2020-08-08 10:17:25 -070089 $elif ACTIVATION == "RELU":
90 vy = ${MAX_F32}(vy, 0.0f);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -080091 *y++ = vy;
92 n -= sizeof(float);
93 } while (n != 0);
94 $else:
95 const float va = *a;
96 const float vb = *b;
97 float vy = ${OP_FUNC("va", "vb")};
Marat Dukhan13bafb02020-06-05 00:43:11 -070098 $if OP == "SQRDIFF":
99 vy = vy * vy;
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700100 $if ACTIVATION == "MINMAX":
101 vy = ${MAX_F32}(vy, vy_min);
102 vy = ${MIN_F32}(vy, vy_max);
Frank Barchard674778d2020-08-08 10:17:25 -0700103 $elif ACTIVATION == "RELU":
104 vy = ${MAX_F32}(vy, 0.0f);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800105 *y = vy;
106 }
107 $else:
108 for (; n >= sizeof(float); n -= sizeof(float)) {
109 const float va = *a++;
110 const float vb = *b++;
111 float vy = ${OP_FUNC("va", "vb")};
Marat Dukhan13bafb02020-06-05 00:43:11 -0700112 $if OP == "SQRDIFF":
113 vy = vy * vy;
Marat Dukhan91cd2b72020-04-09 23:57:31 -0700114 $if ACTIVATION == "MINMAX":
115 vy = ${MAX_F32}(vy, vy_min);
116 vy = ${MIN_F32}(vy, vy_max);
Frank Barchard674778d2020-08-08 10:17:25 -0700117 $elif ACTIVATION == "RELU":
118 vy = ${MAX_F32}(vy, 0.0f);
Marat Dukhanc07cb7f2019-11-14 15:32:05 -0800119 *y++ = vy;
120 }
121}