blob: 6792e6fee3df7df6784d1a3d6836d84e90ed379d [file] [log] [blame]
Erich Elsenac4de802019-10-16 04:35:30 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7
Erich Elseneda9c112020-05-11 04:40:25 -07008#include <stdio.h>
9
Erich Elsenac4de802019-10-16 04:35:30 -070010#include <xnnpack/dwconv.h>
11#include <xnnpack/math.h>
12
13
Marat Dukhan1f29b802020-05-15 23:46:39 -070014void xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar(
Erich Elseneda9c112020-05-11 04:40:25 -070015 size_t input_height,
16 size_t input_width,
Erich Elsenac4de802019-10-16 04:35:30 -070017 const float* input,
18 const float* weights,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070019 const float* zero,
Erich Elsenac4de802019-10-16 04:35:30 -070020 float* output,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070021 uint32_t padding_top,
Marat Dukhan1f29b802020-05-15 23:46:39 -070022 const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
Erich Elsenac4de802019-10-16 04:35:30 -070023{
Erich Elseneda9c112020-05-11 04:40:25 -070024 assert(input_height!= 0);
25 assert(input_width != 0);
26 assert(padding_top >= 0 && padding_top <= 1);
Erich Elsenac4de802019-10-16 04:35:30 -070027
Marat Dukhanae7e8b22020-10-20 17:51:51 -070028 const size_t input_tuple_stride = sizeof(float);
29 const size_t output_tuple_stride = sizeof(float);
30 const size_t input_width_stride = input_width * sizeof(float);
31 const size_t output_width = (input_width + 1) / 2;
32 const size_t output_width_stride = output_width * sizeof(float);
33
Erich Elseneda9c112020-05-11 04:40:25 -070034 const size_t padded_input_height = input_height + padding_top + 1 /* padding_bottom */;
35 const size_t output_height = (padded_input_height - 3) / 2 + 1;
36
37 const size_t input_width_decrement_single = (input_width/2) * 2 * input_tuple_stride;;
Erich Elsen4e5db3d2020-05-07 08:57:47 -070038 const size_t input_width_increment = 2 * input_width_stride - input_width_decrement_single;
Erich Elseneda9c112020-05-11 04:40:25 -070039 const size_t output_width_increment = output_width_stride - (input_width/2) * output_tuple_stride;
Erich Elsenac4de802019-10-16 04:35:30 -070040
41 const float params_min = params->scalar.min;
42 const float params_max = params->scalar.max;
43
Erich Elsen4e5db3d2020-05-07 08:57:47 -070044 const float* i0;
45 const float* i1;
46 const float* i2;
47
48 if (padding_top == 0) {
49 i0 = input;
50 i1 = (const float*) ((uintptr_t) i0 + input_width_stride);
51 i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
Erich Elseneda9c112020-05-11 04:40:25 -070052 if (input_height <= 2) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070053 i2 = zero;
54 }
Erich Elseneda9c112020-05-11 04:40:25 -070055 if (input_height == 1) {
56 i1 = zero;
57 }
Erich Elsen4e5db3d2020-05-07 08:57:47 -070058 } else {
59 i0 = zero;
60 i1 = input;
61 i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
Erich Elseneda9c112020-05-11 04:40:25 -070062 if (input_height == 1) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070063 i2 = zero;
64 }
65 }
Erich Elsenac4de802019-10-16 04:35:30 -070066
67 float* output0 = output;
68
69 const float vw0 = weights[0];
70 const float vw1 = weights[1];
71 const float vw2 = weights[2];
72 const float vw3 = weights[3];
73 const float vw4 = weights[4];
74 const float vw5 = weights[5];
75 const float vw6 = weights[6];
76 const float vw7 = weights[7];
77 const float vw8 = weights[8];
78 const float vw9 = weights[9];
79
Erich Elseneda9c112020-05-11 04:40:25 -070080 size_t m = output_height;
Erich Elsenac4de802019-10-16 04:35:30 -070081 while (m > 0) {
82 float vi0x0 = 0.0f;
83 float vi1x0 = 0.0f;
84 float vi2x0 = 0.0f;
85
Erich Elseneda9c112020-05-11 04:40:25 -070086 size_t k = input_width;
Erich Elsenac4de802019-10-16 04:35:30 -070087 for (; k >= 2; k -= 2) {
88 const float vi0x1 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
89 const float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
90 const float vi2x1 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
91 const float vi0x2 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
92 const float vi1x2 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
93 const float vi2x2 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
94
95 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1 + vw3 * vi0x2;
96 vi0x0 = vi0x2;
97 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2;
98 vi1x0 = vi1x2;
99 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1 + vw9 * vi2x2;
100 vi2x0 = vi2x2;
101
102 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
103
104 voutput = math_max_f32(voutput, params_min);
105 voutput = math_min_f32(voutput, params_max);
106
107 *output0 = voutput; output0 = (float *) ((uintptr_t) output0 + output_tuple_stride);
108 }
109 // Possibly process the last pixel separately to account for right edge.
110 if (k == 1)
111 {
112 const float vi0x1 = i0[0];
113 const float vi1x1 = i1[0];
114 const float vi2x1 = i2[0];
115 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1;
116 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1;
117 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1;
118
119 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
120
121 voutput = math_max_f32(voutput, params_min);
122 voutput = math_min_f32(voutput, params_max);
123
124 *output0 = voutput;
125 }
126
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700127 i0 = (const float*) ((uintptr_t) i2 - input_width_decrement_single);
Erich Elsenac4de802019-10-16 04:35:30 -0700128 i1 = (const float*) ((uintptr_t) i1 + input_width_increment);
129 i2 = (const float*) ((uintptr_t) i2 + input_width_increment);
130 output0 = (float*) ((uintptr_t) output0 + output_width_increment);
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700131 m -= 1;
Erich Elseneda9c112020-05-11 04:40:25 -0700132 if (m == 1 && padding_top == input_height % 2) {
133 // to mimic the following code with only one if, we do some small
134 // shenanigans...
135 // if (padding_top == 0 && input_height % 2 == 0) {
136 // i2 = zero;
137 // } else if (padding_top == 1 && input_height % 2 == 1) {
138 // i2 = zero;
139 // }
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700140 i2 = zero;
141 }
Erich Elsenac4de802019-10-16 04:35:30 -0700142 }
143}