blob: 5da3d70be281a2bea8a7dd29bd18eee10cb43b9b [file] [log] [blame]
Erich Elsenac4de802019-10-16 04:35:30 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7
8#include <xnnpack/dwconv.h>
9#include <xnnpack/math.h>
10
11
12void xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar(
13 size_t m,
14 size_t n,
15 const float* input,
16 const float* weights,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070017 const float* zero,
Erich Elsenac4de802019-10-16 04:35:30 -070018 float* output,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070019 uint32_t padding_top,
Erich Elsenac4de802019-10-16 04:35:30 -070020 size_t input_tuple_stride,
21 size_t output_tuple_stride,
22 size_t input_width_stride,
23 size_t output_width_stride,
Marat Dukhanf196d012020-04-15 11:50:03 -070024 const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
Erich Elsenac4de802019-10-16 04:35:30 -070025{
26 assert(n != 0);
Erich Elsen4e5db3d2020-05-07 08:57:47 -070027 assert(padding_top <= 1);
Erich Elsenac4de802019-10-16 04:35:30 -070028
Erich Elsen4e5db3d2020-05-07 08:57:47 -070029 const size_t input_width_decrement_single = (n/2) * 2 * input_tuple_stride;;
30 const size_t input_width_increment = 2 * input_width_stride - input_width_decrement_single;
Erich Elsenac4de802019-10-16 04:35:30 -070031 const size_t output_width_increment = output_width_stride - (n/2) * output_tuple_stride;
32
33 const float params_min = params->scalar.min;
34 const float params_max = params->scalar.max;
35
Erich Elsen4e5db3d2020-05-07 08:57:47 -070036 const float* i0;
37 const float* i1;
38 const float* i2;
39
40 if (padding_top == 0) {
41 i0 = input;
42 i1 = (const float*) ((uintptr_t) i0 + input_width_stride);
43 i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
44 if (m == 1) {
45 i2 = zero;
46 }
47 } else {
48 i0 = zero;
49 i1 = input;
50 i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
51 if (m == 1) {
52 i2 = zero;
53 }
54 }
Erich Elsenac4de802019-10-16 04:35:30 -070055
56 float* output0 = output;
57
58 const float vw0 = weights[0];
59 const float vw1 = weights[1];
60 const float vw2 = weights[2];
61 const float vw3 = weights[3];
62 const float vw4 = weights[4];
63 const float vw5 = weights[5];
64 const float vw6 = weights[6];
65 const float vw7 = weights[7];
66 const float vw8 = weights[8];
67 const float vw9 = weights[9];
68
69 while (m > 0) {
70 float vi0x0 = 0.0f;
71 float vi1x0 = 0.0f;
72 float vi2x0 = 0.0f;
73
74 size_t k = n;
75 for (; k >= 2; k -= 2) {
76 const float vi0x1 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
77 const float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
78 const float vi2x1 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
79 const float vi0x2 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
80 const float vi1x2 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
81 const float vi2x2 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
82
83 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1 + vw3 * vi0x2;
84 vi0x0 = vi0x2;
85 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2;
86 vi1x0 = vi1x2;
87 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1 + vw9 * vi2x2;
88 vi2x0 = vi2x2;
89
90 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
91
92 voutput = math_max_f32(voutput, params_min);
93 voutput = math_min_f32(voutput, params_max);
94
95 *output0 = voutput; output0 = (float *) ((uintptr_t) output0 + output_tuple_stride);
96 }
97 // Possibly process the last pixel separately to account for right edge.
98 if (k == 1)
99 {
100 const float vi0x1 = i0[0];
101 const float vi1x1 = i1[0];
102 const float vi2x1 = i2[0];
103 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1;
104 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1;
105 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1;
106
107 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
108
109 voutput = math_max_f32(voutput, params_min);
110 voutput = math_min_f32(voutput, params_max);
111
112 *output0 = voutput;
113 }
114
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700115 i0 = (const float*) ((uintptr_t) i2 - input_width_decrement_single);
Erich Elsenac4de802019-10-16 04:35:30 -0700116 i1 = (const float*) ((uintptr_t) i1 + input_width_increment);
117 i2 = (const float*) ((uintptr_t) i2 + input_width_increment);
118 output0 = (float*) ((uintptr_t) output0 + output_width_increment);
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700119 m -= 1;
120 if (m == 1) {
121 i2 = zero;
122 }
Erich Elsenac4de802019-10-16 04:35:30 -0700123 }
124}