blob: af60813441bfd6b3d736959e598bad45ecf422a2 [file] [log] [blame]
Erich Elsen0cc2c532019-10-15 04:44:18 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7
8#include <xnnpack/dwconv.h>
9#include <xnnpack/math.h>
10
11
12void xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar(
13 size_t m,
14 size_t n,
15 const float* input,
16 const float* weights,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070017 const float* zero,
Erich Elsen0cc2c532019-10-15 04:44:18 -070018 float* output,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070019 uint32_t padding_top,
Erich Elsen0cc2c532019-10-15 04:44:18 -070020 size_t input_tuple_stride,
21 size_t output_tuple_stride,
22 size_t input_width_stride,
23 size_t output_width_stride,
Marat Dukhanf196d012020-04-15 11:50:03 -070024 const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
Erich Elsen0cc2c532019-10-15 04:44:18 -070025{
26 assert(n != 0);
Erich Elsen4e5db3d2020-05-07 08:57:47 -070027 assert(padding_top == 1);
Erich Elsen0cc2c532019-10-15 04:44:18 -070028
Erich Elsen4e5db3d2020-05-07 08:57:47 -070029 const size_t input_width_decrement = n * input_tuple_stride;
30 const size_t input_width_increment = input_width_stride - input_width_decrement;
Erich Elsen0cc2c532019-10-15 04:44:18 -070031 const size_t output_width_increment = output_width_stride - (n - 1) * output_tuple_stride;
32
33 const float params_min = params->scalar.min;
34 const float params_max = params->scalar.max;
35
Erich Elsen4e5db3d2020-05-07 08:57:47 -070036 const float* i0 = zero;
37 const float* i1 = input;
Erich Elsen0cc2c532019-10-15 04:44:18 -070038 const float* i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
39
Erich Elsen4e5db3d2020-05-07 08:57:47 -070040 if (m == 1) {
41 i2 = zero;
42 }
43
Erich Elsen0cc2c532019-10-15 04:44:18 -070044 float* output0 = output;
45
46 const float vw0 = weights[0];
47 const float vw1 = weights[1];
48 const float vw2 = weights[2];
49 const float vw3 = weights[3];
50 const float vw4 = weights[4];
51 const float vw5 = weights[5];
52 const float vw6 = weights[6];
53 const float vw7 = weights[7];
54 const float vw8 = weights[8];
55 const float vw9 = weights[9];
56
57 while (m > 0) {
58 float vi0x0 = 0.0f;
59 float vi1x0 = 0.0f;
60 float vi2x0 = 0.0f;
61 float vi0x1 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
62 float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
63 float vi2x1 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
64
65 size_t k = n;
66 for (; k > 1; k--) {
67 const float vi0x2 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
68 const float vi1x2 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
69 const float vi2x2 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
70
71 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1 + vw3 * vi0x2;
72 vi0x0 = vi0x1;
73 vi0x1 = vi0x2;
74 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2;
75 vi1x0 = vi1x1;
76 vi1x1 = vi1x2;
77 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1 + vw9 * vi2x2;
78 vi2x0 = vi2x1;
79 vi2x1 = vi2x2;
80
81 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
82
83 voutput = math_max_f32(voutput, params_min);
84 voutput = math_min_f32(voutput, params_max);
85
86 *output0 = voutput; output0 = (float *) ((uintptr_t) output0 + output_tuple_stride);
87 }
88 // Always process the last pixel separately to account for right edge.
89 assert(k == 1);
90 {
91 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1;
92 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1;
93 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1;
94
95 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
96
97 voutput = math_max_f32(voutput, params_min);
98 voutput = math_min_f32(voutput, params_max);
99
100 *output0 = voutput;
101 }
102
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700103 i0 = (const float*) ((uintptr_t) i1 - input_width_decrement);
Erich Elsen0cc2c532019-10-15 04:44:18 -0700104 i1 = (const float*) ((uintptr_t) i1 + input_width_increment);
105 i2 = (const float*) ((uintptr_t) i2 + input_width_increment);
106 output0 = (float*) ((uintptr_t) output0 + output_width_increment);
107 m--;
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700108 if (m == 1) {
109 i2 = zero;
110 }
Erich Elsen0cc2c532019-10-15 04:44:18 -0700111 }
112}