XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright 2019 Google LLC |
| 2 | // |
| 3 | // This source code is licensed under the BSD-style license found in the |
| 4 | // LICENSE file in the root directory of this source tree. |
| 5 | |
Marat Dukhan | f32ae34 | 2020-03-10 19:21:17 -0700 | [diff] [blame] | 6 | $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 7 | #include <assert.h> |
| 8 | |
| 9 | #include <xnnpack/math.h> |
| 10 | #include <xnnpack/spmm.h> |
| 11 | |
| 12 | |
Marat Dukhan | 355ab43 | 2020-04-09 19:01:52 -0700 | [diff] [blame] | 13 | void xnn_f32_spmm_minmax_ukernel_${MR}x${NR}__scalar_pipelined( |
Marat Dukhan | ee2df51 | 2020-11-13 13:48:23 -0800 | [diff] [blame] | 14 | size_t batch_size, |
| 15 | size_t output_channels, |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 16 | const float*restrict input, |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 17 | const float*restrict weights, |
| 18 | const int32_t*restrict widx_dmap, |
| 19 | const uint32_t*restrict nidx_nnzmap, |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 20 | float*restrict output, |
Marat Dukhan | f196d01 | 2020-04-15 11:50:03 -0700 | [diff] [blame] | 21 | const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 22 | { |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 23 | assert(batch_size != 0); |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 24 | assert(batch_size % sizeof(float) == 0); |
Marat Dukhan | 1717075 | 2020-11-13 14:31:10 -0800 | [diff] [blame] | 25 | assert(output_channels != 0); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 26 | |
| 27 | const float vmin = params->scalar.min; |
| 28 | const float vmax = params->scalar.max; |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 29 | size_t output_decrement = batch_size * output_channels - ${MR} * sizeof(float); |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 30 | size_t n = batch_size; |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 31 | while XNN_LIKELY(n >= ${MR} * sizeof(float)) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 32 | const float*restrict w = weights; |
| 33 | const int32_t* dmap = widx_dmap; |
| 34 | const uint32_t* nnzmap = nidx_nnzmap; |
| 35 | float vw = *w++; |
| 36 | intptr_t diff = *dmap++; |
| 37 | $for M in range(MR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 38 | float vi${ABC[M]} = input[${M}]; |
| 39 | size_t c = output_channels; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 40 | do { |
| 41 | uint32_t nnz = *nnzmap++; |
| 42 | $for M in range(MR): |
| 43 | float vacc${ABC[M]} = vw; |
| 44 | vw = *w++; |
| 45 | if XNN_LIKELY(nnz != 0) { |
| 46 | do { |
| 47 | $for M in range(MR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 48 | vacc${ABC[M]} += vi${ABC[M]} * vw; |
| 49 | input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 50 | |
| 51 | diff = *dmap++; |
| 52 | vw = *w++; |
| 53 | $for M in range(MR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 54 | vi${ABC[M]} = input[${M}]; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 55 | } while (--nnz != 0); |
| 56 | } |
| 57 | $for M in range(MR): |
| 58 | float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); |
| 59 | $for M in range(MR): |
| 60 | vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); |
| 61 | $for M in range(MR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 62 | output[${M}] = vout${ABC[M]}; |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 63 | output = (float*restrict) ((uintptr_t) output + batch_size); |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 64 | } while (--c != 0); |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 65 | output = (float*restrict) ((uintptr_t) output - output_decrement); |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 66 | input += ${MR}; |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 67 | n -= ${MR} * sizeof(float); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 68 | } |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 69 | if XNN_UNLIKELY(n != 0) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 70 | $for LOG2M in reversed(range((MR - 1).bit_length())): |
| 71 | $SUBMR = 1 << LOG2M |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 72 | $if SUBMR * 2 >= MR: |
| 73 | output_decrement += ${MR - SUBMR} * sizeof(float); |
| 74 | $else: |
| 75 | output_decrement += ${SUBMR} * sizeof(float); |
| 76 | if (n & (${SUBMR} * sizeof(float))) { |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 77 | const float*restrict w = weights; |
| 78 | const int32_t* dmap = widx_dmap; |
| 79 | const uint32_t* nnzmap = nidx_nnzmap; |
| 80 | float vw = *w++; |
| 81 | intptr_t diff = *dmap++; |
| 82 | $for M in range(SUBMR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 83 | float vi${ABC[M]} = input[${M}]; |
| 84 | size_t c = output_channels; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 85 | do { |
| 86 | uint32_t nnz = *nnzmap++; |
| 87 | $for M in range(SUBMR): |
| 88 | float vacc${ABC[M]} = vw; |
| 89 | vw = *w++; |
| 90 | if XNN_LIKELY(nnz != 0) { |
| 91 | do { |
| 92 | $for M in range(SUBMR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 93 | vacc${ABC[M]} += vi${ABC[M]} * vw; |
| 94 | input = (const float*restrict) ((uintptr_t) input + (uintptr_t) diff); |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 95 | |
| 96 | diff = *dmap++; |
| 97 | vw = *w++; |
| 98 | $for M in range(SUBMR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 99 | vi${ABC[M]} = input[${M}]; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 100 | } while (--nnz != 0); |
| 101 | } |
| 102 | $for M in range(SUBMR): |
| 103 | float vout${ABC[M]} = math_min_f32(vacc${ABC[M]}, vmax); |
| 104 | $for M in range(SUBMR): |
| 105 | vout${ABC[M]} = math_max_f32(vout${ABC[M]}, vmin); |
| 106 | $for M in range(SUBMR): |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 107 | output[${M}] = vout${ABC[M]}; |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 108 | output = (float*restrict) ((uintptr_t) output + batch_size); |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 109 | } while (--c != 0); |
Marat Dukhan | e278a55 | 2020-11-14 16:14:58 -0800 | [diff] [blame^] | 110 | const size_t output_decrement = batch_size * output_channels - ${SUBMR} * sizeof(float); |
| 111 | output = (float*restrict) ((uintptr_t) output - output_decrement); |
Marat Dukhan | 1530116 | 2020-07-23 21:30:50 -0700 | [diff] [blame] | 112 | input += ${SUBMR}; |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 113 | } |
| 114 | } |
| 115 | } |