blob: e7ffab1665a3eb4184a95954a646960992521630 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhanf32ae342020-03-10 19:21:17 -07006$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
XNNPACK Teamb455b122019-09-27 18:10:33 -07007#include <assert.h>
8
9#include <xnnpack/math.h>
10#include <xnnpack/spmm.h>
11
12
Marat Dukhan355ab432020-04-09 19:01:52 -070013void xnn_f32_spmm_minmax_ukernel_${MR}x${NR}__scalar_pipelined(
Marat Dukhane8bfcc82020-11-16 12:28:13 -080014 size_t mc,
15 size_t nc,
Marat Dukhan15301162020-07-23 21:30:50 -070016 const float*restrict input,
XNNPACK Teamb455b122019-09-27 18:10:33 -070017 const float*restrict weights,
18 const int32_t*restrict widx_dmap,
19 const uint32_t*restrict nidx_nnzmap,
Marat Dukhan15301162020-07-23 21:30:50 -070020 float*restrict output,
Marat Dukhane8bfcc82020-11-16 12:28:13 -080021 size_t output_stride,
Marat Dukhanf196d012020-04-15 11:50:03 -070022 const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
XNNPACK Teamb455b122019-09-27 18:10:33 -070023{
Marat Dukhane8bfcc82020-11-16 12:28:13 -080024 assert(mc != 0);
25 assert(mc % sizeof(float) == 0);
26 assert(nc != 0);
XNNPACK Teamb455b122019-09-27 18:10:33 -070027
28 const float vmin = params->scalar.min;
29 const float vmax = params->scalar.max;
Marat Dukhane8bfcc82020-11-16 12:28:13 -080030 size_t output_decrement = output_stride * nc - ${MR} * sizeof(float);
31 while XNN_LIKELY(mc >= ${MR} * sizeof(float)) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070032 const float*restrict w = weights;
33 const int32_t* dmap = widx_dmap;
34 const uint32_t* nnzmap = nidx_nnzmap;
35 float vw = *w++;
36 intptr_t diff = *dmap++;
37 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070038 float vi${ABC[M]} = input[${M}];
Marat Dukhane8bfcc82020-11-16 12:28:13 -080039 size_t n = nc;
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 do {
41 uint32_t nnz = *nnzmap++;
42 $for M in range(MR):
43 float vacc${ABC[M]} = vw;
44 vw = *w++;
45 if XNN_LIKELY(nnz != 0) {
46 do {
47 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070048 vacc${ABC[M]} += vi${ABC[M]} * vw;
49 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
XNNPACK Teamb455b122019-09-27 18:10:33 -070050
51 diff = *dmap++;
52 vw = *w++;
53 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070054 vi${ABC[M]} = input[${M}];
XNNPACK Teamb455b122019-09-27 18:10:33 -070055 } while (--nnz != 0);
56 }
57 $for M in range(MR):
58 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
59 $for M in range(MR):
60 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
61 $for M in range(MR):
Marat Dukhan15301162020-07-23 21:30:50 -070062 output[${M}] = vout${ABC[M]};
Marat Dukhane8bfcc82020-11-16 12:28:13 -080063 output = (float*restrict) ((uintptr_t) output + output_stride);
64 } while (--n != 0);
Marat Dukhane278a552020-11-14 16:14:58 -080065 output = (float*restrict) ((uintptr_t) output - output_decrement);
Marat Dukhan15301162020-07-23 21:30:50 -070066 input += ${MR};
Marat Dukhane8bfcc82020-11-16 12:28:13 -080067 mc -= ${MR} * sizeof(float);
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 }
Marat Dukhane8bfcc82020-11-16 12:28:13 -080069 if XNN_UNLIKELY(mc != 0) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070070 $for LOG2M in reversed(range((MR - 1).bit_length())):
71 $SUBMR = 1 << LOG2M
Marat Dukhane278a552020-11-14 16:14:58 -080072 $if SUBMR * 2 >= MR:
73 output_decrement += ${MR - SUBMR} * sizeof(float);
74 $else:
75 output_decrement += ${SUBMR} * sizeof(float);
Marat Dukhane8bfcc82020-11-16 12:28:13 -080076 if (mc & (${SUBMR} * sizeof(float))) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070077 const float*restrict w = weights;
78 const int32_t* dmap = widx_dmap;
79 const uint32_t* nnzmap = nidx_nnzmap;
80 float vw = *w++;
81 intptr_t diff = *dmap++;
82 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070083 float vi${ABC[M]} = input[${M}];
Marat Dukhane8bfcc82020-11-16 12:28:13 -080084 size_t n = nc;
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 do {
86 uint32_t nnz = *nnzmap++;
87 $for M in range(SUBMR):
88 float vacc${ABC[M]} = vw;
89 vw = *w++;
90 if XNN_LIKELY(nnz != 0) {
91 do {
92 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070093 vacc${ABC[M]} += vi${ABC[M]} * vw;
94 input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff);
XNNPACK Teamb455b122019-09-27 18:10:33 -070095
96 diff = *dmap++;
97 vw = *w++;
98 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -070099 vi${ABC[M]} = input[${M}];
XNNPACK Teamb455b122019-09-27 18:10:33 -0700100 } while (--nnz != 0);
101 }
102 $for M in range(SUBMR):
103 float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax);
104 $for M in range(SUBMR):
105 vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin);
106 $for M in range(SUBMR):
Marat Dukhan15301162020-07-23 21:30:50 -0700107 output[${M}] = vout${ABC[M]};
Marat Dukhane8bfcc82020-11-16 12:28:13 -0800108 output = (float*restrict) ((uintptr_t) output + output_stride);
109 } while (--n != 0);
Marat Dukhane278a552020-11-14 16:14:58 -0800110 output = (float*restrict) ((uintptr_t) output - output_decrement);
Marat Dukhan15301162020-07-23 21:30:50 -0700111 input += ${SUBMR};
XNNPACK Teamb455b122019-09-27 18:10:33 -0700112 }
113 }
114}