blob: 22646d01495c33e5864a377588590bcb280d10d9 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhanf32ae342020-03-10 19:21:17 -07006$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
XNNPACK Teamb455b122019-09-27 18:10:33 -07007#include <assert.h>
8
9#include <xnnpack/math.h>
10#include <xnnpack/spmm.h>
11
12
Marat Dukhan355ab432020-04-09 19:01:52 -070013void xnn_f32_spmm_minmax_ukernel_${MR}x${NR}__scalar_pipelined(
Marat Dukhanee2df512020-11-13 13:48:23 -080014 size_t batch_size,
15 size_t output_channels,
Marat Dukhan15301162020-07-23 21:30:50 -070016 const float*restrict input,
XNNPACK Teamb455b122019-09-27 18:10:33 -070017 const float*restrict weights,
18 const int32_t*restrict widx_dmap,
19 const uint32_t*restrict nidx_nnzmap,
Marat Dukhan15301162020-07-23 21:30:50 -070020 float*restrict output,
Marat Dukhanf196d012020-04-15 11:50:03 -070021 const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
XNNPACK Teamb455b122019-09-27 18:10:33 -070022{
Marat Dukhan15301162020-07-23 21:30:50 -070023 assert(batch_size != 0);
Marat Dukhane278a552020-11-14 16:14:58 -080024 assert(batch_size % sizeof(float) == 0);
Marat Dukhan17170752020-11-13 14:31:10 -080025 assert(output_channels != 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -070026
27 const float vmin = params->scalar.min;
28 const float vmax = params->scalar.max;
Marat Dukhane278a552020-11-14 16:14:58 -080029 size_t output_decrement = batch_size * output_channels - ${MR} * sizeof(float);
Marat Dukhan15301162020-07-23 21:30:50 -070030 size_t n = batch_size;
Marat Dukhane278a552020-11-14 16:14:58 -080031 while XNN_LIKELY(n >= ${MR} * sizeof(float)) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070032 const float*restrict w = weights;
33 const int32_t* dmap = widx_dmap;
34 const uint32_t* nnzmap = nidx_nnzmap;
35 float vw = *w++;
36 intptr_t diff = *dmap++;
37 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070038 float vi${ABC[M]} = input[${M}];
39 size_t c = output_channels;
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 do {
41 uint32_t nnz = *nnzmap++;
42 $for M in range(MR):
43 float vacc${ABC[M]} = vw;
44 vw = *w++;
45 if XNN_LIKELY(nnz != 0) {
46 do {
47 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070048 vacc${ABC[M]} += vi${ABC[M]} * vw;
49 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
XNNPACK Teamb455b122019-09-27 18:10:33 -070050
51 diff = *dmap++;
52 vw = *w++;
53 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070054 vi${ABC[M]} = input[${M}];
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 } while (--nnz != 0);
56 }
57 $for M in range(MR):
58 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
59 $for M in range(MR):
60 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
61 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070062 output[${M}] = vout${ABC[M]};
Marat Dukhane278a552020-11-14 16:14:58 -080063 output = (float*restrict) ((uintptr_t) output + batch_size);
Marat Dukhan15301162020-07-23 21:30:50 -070064 } while (--c != 0);
Marat Dukhane278a552020-11-14 16:14:58 -080065 output = (float*restrict) ((uintptr_t) output - output_decrement);
Marat Dukhan15301162020-07-23 21:30:50 -070066 input += ${MR};
Marat Dukhane278a552020-11-14 16:14:58 -080067 n -= ${MR} * sizeof(float);
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 }
Marat Dukhan15301162020-07-23 21:30:50 -070069 if XNN_UNLIKELY(n != 0) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070070 $for LOG2M in reversed(range((MR - 1).bit_length())):
71 $SUBMR = 1 << LOG2M
Marat Dukhane278a552020-11-14 16:14:58 -080072 $if SUBMR * 2 >= MR:
73 output_decrement += ${MR - SUBMR} * sizeof(float);
74 $else:
75 output_decrement += ${SUBMR} * sizeof(float);
76 if (n & (${SUBMR} * sizeof(float))) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 const float*restrict w = weights;
78 const int32_t* dmap = widx_dmap;
79 const uint32_t* nnzmap = nidx_nnzmap;
80 float vw = *w++;
81 intptr_t diff = *dmap++;
82 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070083 float vi${ABC[M]} = input[${M}];
84 size_t c = output_channels;
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 do {
86 uint32_t nnz = *nnzmap++;
87 $for M in range(SUBMR):
88 float vacc${ABC[M]} = vw;
89 vw = *w++;
90 if XNN_LIKELY(nnz != 0) {
91 do {
92 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070093 vacc${ABC[M]} += vi${ABC[M]} * vw;
94 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
XNNPACK Teamb455b122019-09-27 18:10:33 -070095
96 diff = *dmap++;
97 vw = *w++;
98 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070099 vi${ABC[M]} = input[${M}];
XNNPACK Teamb455b122019-09-27 18:10:33 -0700100 } while (--nnz != 0);
101 }
102 $for M in range(SUBMR):
103 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
104 $for M in range(SUBMR):
105 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
106 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -0700107 output[${M}] = vout${ABC[M]};
Marat Dukhane278a552020-11-14 16:14:58 -0800108 output = (float*restrict) ((uintptr_t) output + batch_size);
Marat Dukhan15301162020-07-23 21:30:50 -0700109 } while (--c != 0);
Marat Dukhane278a552020-11-14 16:14:58 -0800110 const size_t output_decrement = batch_size * output_channels - ${SUBMR} * sizeof(float);
111 output = (float*restrict) ((uintptr_t) output - output_decrement);
Marat Dukhan15301162020-07-23 21:30:50 -0700112 input += ${SUBMR};
XNNPACK Teamb455b122019-09-27 18:10:33 -0700113 }
114 }
115}