blob: 607cbf78189be2ef868595bd90460315aaee0945 [file] [log] [blame]
Erich Elsen0cc2c532019-10-15 04:44:18 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7
8#include <xnnpack/dwconv.h>
9#include <xnnpack/math.h>
10
11
12void xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar(
Erich Elseneda9c112020-05-11 04:40:25 -070013 size_t input_height,
14 size_t input_width,
Erich Elsen0cc2c532019-10-15 04:44:18 -070015 const float* input,
16 const float* weights,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070017 const float* zero,
Erich Elsen0cc2c532019-10-15 04:44:18 -070018 float* output,
Erich Elsen4e5db3d2020-05-07 08:57:47 -070019 uint32_t padding_top,
Erich Elsen0cc2c532019-10-15 04:44:18 -070020 size_t input_tuple_stride,
21 size_t output_tuple_stride,
22 size_t input_width_stride,
23 size_t output_width_stride,
Marat Dukhanf196d012020-04-15 11:50:03 -070024 const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
Erich Elsen0cc2c532019-10-15 04:44:18 -070025{
Erich Elseneda9c112020-05-11 04:40:25 -070026 assert(input_width != 0);
27 assert(input_height != 0);
Erich Elsen4e5db3d2020-05-07 08:57:47 -070028 assert(padding_top == 1);
Erich Elsen0cc2c532019-10-15 04:44:18 -070029
Erich Elseneda9c112020-05-11 04:40:25 -070030 const size_t padded_input_height = input_height + padding_top + 1 /* padding_bottom */;
31 const size_t output_height = padded_input_height - 3 + 1;
32
33 const size_t input_width_decrement = input_width * input_tuple_stride;
Erich Elsen4e5db3d2020-05-07 08:57:47 -070034 const size_t input_width_increment = input_width_stride - input_width_decrement;
Erich Elseneda9c112020-05-11 04:40:25 -070035 const size_t output_width_increment = output_width_stride - (input_width - 1) * output_tuple_stride;
Erich Elsen0cc2c532019-10-15 04:44:18 -070036
37 const float params_min = params->scalar.min;
38 const float params_max = params->scalar.max;
39
Erich Elsen4e5db3d2020-05-07 08:57:47 -070040 const float* i0 = zero;
41 const float* i1 = input;
Erich Elsen0cc2c532019-10-15 04:44:18 -070042 const float* i2 = (const float*) ((uintptr_t) i1 + input_width_stride);
43
Erich Elseneda9c112020-05-11 04:40:25 -070044 if (input_height == 1) {
Erich Elsen4e5db3d2020-05-07 08:57:47 -070045 i2 = zero;
46 }
47
Erich Elsen0cc2c532019-10-15 04:44:18 -070048 float* output0 = output;
49
50 const float vw0 = weights[0];
51 const float vw1 = weights[1];
52 const float vw2 = weights[2];
53 const float vw3 = weights[3];
54 const float vw4 = weights[4];
55 const float vw5 = weights[5];
56 const float vw6 = weights[6];
57 const float vw7 = weights[7];
58 const float vw8 = weights[8];
59 const float vw9 = weights[9];
60
Erich Elseneda9c112020-05-11 04:40:25 -070061 size_t m = output_height;
Erich Elsen0cc2c532019-10-15 04:44:18 -070062 while (m > 0) {
63 float vi0x0 = 0.0f;
64 float vi1x0 = 0.0f;
65 float vi2x0 = 0.0f;
66 float vi0x1 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
67 float vi1x1 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
68 float vi2x1 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
69
Erich Elseneda9c112020-05-11 04:40:25 -070070 size_t k = input_width;
Erich Elsen0cc2c532019-10-15 04:44:18 -070071 for (; k > 1; k--) {
72 const float vi0x2 = *i0; i0 = (const float*) ((uintptr_t) i0 + input_tuple_stride);
73 const float vi1x2 = *i1; i1 = (const float*) ((uintptr_t) i1 + input_tuple_stride);
74 const float vi2x2 = *i2; i2 = (const float*) ((uintptr_t) i2 + input_tuple_stride);
75
76 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1 + vw3 * vi0x2;
77 vi0x0 = vi0x1;
78 vi0x1 = vi0x2;
79 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1 + vw6 * vi1x2;
80 vi1x0 = vi1x1;
81 vi1x1 = vi1x2;
82 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1 + vw9 * vi2x2;
83 vi2x0 = vi2x1;
84 vi2x1 = vi2x2;
85
86 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
87
88 voutput = math_max_f32(voutput, params_min);
89 voutput = math_min_f32(voutput, params_max);
90
91 *output0 = voutput; output0 = (float *) ((uintptr_t) output0 + output_tuple_stride);
92 }
93 // Always process the last pixel separately to account for right edge.
94 assert(k == 1);
95 {
96 const float vrow0_accum = vw1 * vi0x0 + vw2 * vi0x1;
97 const float vrow1_accum = vw4 * vi1x0 + vw5 * vi1x1;
98 const float vrow2_accum = vw7 * vi2x0 + vw8 * vi2x1;
99
100 float voutput = (vw0 + vrow0_accum) + (vrow1_accum + vrow2_accum);
101
102 voutput = math_max_f32(voutput, params_min);
103 voutput = math_min_f32(voutput, params_max);
104
105 *output0 = voutput;
106 }
107
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700108 i0 = (const float*) ((uintptr_t) i1 - input_width_decrement);
Erich Elsen0cc2c532019-10-15 04:44:18 -0700109 i1 = (const float*) ((uintptr_t) i1 + input_width_increment);
110 i2 = (const float*) ((uintptr_t) i2 + input_width_increment);
111 output0 = (float*) ((uintptr_t) output0 + output_width_increment);
112 m--;
Erich Elsen4e5db3d2020-05-07 08:57:47 -0700113 if (m == 1) {
114 i2 = zero;
115 }
Erich Elsen0cc2c532019-10-15 04:44:18 -0700116 }
117}