blob: 872d4cc37e7a61b6204356eacf866605fa2bd360 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <stddef.h>
Marat Dukhan0ab75532021-11-24 16:50:30 -080010#include <math.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070011
12#include <fxdiv.h>
13
14#include <xnnpack/indirection.h>
15#include <xnnpack/operator.h>
16#include <xnnpack/math.h>
17
18
19void xnn_indirection_init_conv2d(
20 xnn_operator_t op,
21 size_t output_tile_size,
22 uint32_t log2_element_size)
23{
24 const void** indirection_buffer = op->indirection_buffer;
25 const void* input = op->input;
26 const void* zero = op->zero_buffer;
27 const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
28 const size_t input_height = op->input_height;
29 const size_t input_width = op->input_width;
30 const size_t output_height = op->output_height;
31 const size_t output_width = op->output_width;
32 const size_t kernel_height = op->kernel_height;
33 const size_t kernel_width = op->kernel_width;
34 const size_t stride_height = op->stride_height;
35 const size_t stride_width = op->stride_width;
36 const size_t dilation_height = op->dilation_height;
37 const size_t dilation_width = op->dilation_width;
38 const size_t input_padding_top = op->padding_top;
39 const size_t input_padding_left = op->padding_left;
40
41 const size_t output_size = output_height * output_width;
42 const size_t tiled_output_size = round_up(output_size, output_tile_size);
43 const size_t kernel_size = kernel_height * kernel_width;
44
45 const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
46
47 for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
48 for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
49 const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
50 const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
51 const size_t output_x = output_y_x.remainder;
52 const size_t output_y = output_y_x.quotient;
53 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
54 const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
55 if (input_y < input_height) {
56 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
57 const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
58 const size_t kernel_index = kernel_y * kernel_width + kernel_x;
59 const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
60 if (input_x < input_width) {
61 indirection_buffer[index] = (const void*)
62 ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
63 } else {
64 indirection_buffer[index] = zero;
65 }
66 }
67 } else {
68 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
69 const size_t kernel_index = kernel_y * kernel_width + kernel_x;
70 const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
71 indirection_buffer[index] = zero;
72 }
73 }
74 }
75 }
76 }
77}
78
XNNPACK Teamb455b122019-09-27 18:10:33 -070079void xnn_indirection_init_deconv2d(
80 xnn_operator_t op,
81 size_t output_tile_size,
82 uint32_t log2_element_size)
83{
84 const void** indirection_buffer = op->indirection_buffer;
85 const void* input = op->input;
86 const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
87 const void* zero = op->zero_buffer;
88 const size_t input_height = op->input_height;
89 const size_t input_width = op->input_width;
90 const size_t output_height = op->output_height;
91 const size_t output_width = op->output_width;
92 const size_t kernel_height = op->kernel_height;
93 const size_t kernel_width = op->kernel_width;
94 const size_t stride_height = op->stride_height;
95 const size_t stride_width = op->stride_width;
96 const size_t dilation_height = op->dilation_height;
97 const size_t dilation_width = op->dilation_width;
98 const size_t padding_top = op->padding_top;
99 const size_t padding_left = op->padding_left;
100
101 const size_t output_size = output_height * output_width;
102 const size_t tiled_output_size = round_up(output_size, output_tile_size);
103 const size_t kernel_size = kernel_height * kernel_width;
104
105 const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
106 const struct fxdiv_divisor_size_t stride_height_divisor = fxdiv_init_size_t(stride_height);
107 const struct fxdiv_divisor_size_t stride_width_divisor = fxdiv_init_size_t(stride_width);
108
109 for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
110 for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
111 const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
112 const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
113 const size_t output_x = output_y_x.remainder;
114 const size_t output_y = output_y_x.quotient;
115 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
116 const size_t y = output_y + padding_top - kernel_y * dilation_height;
117 const size_t input_y = fxdiv_quotient_size_t(y, stride_height_divisor);
118 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
119 const size_t x = output_x + padding_left - kernel_x * dilation_width;
120 const size_t input_x = fxdiv_quotient_size_t(x, stride_width_divisor);
121 const size_t kernel_index = kernel_y * kernel_width + kernel_x;
122 const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
123 if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {
124 indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
125 } else {
126 indirection_buffer[index] = zero;
127 }
128 }
129 }
130 }
131 }
132}
133
134void xnn_indirection_init_subconv2d(
135 xnn_operator_t op,
136 size_t output_tile_size,
137 uint32_t log2_element_size)
138{
139 const void** indirection_buffer = op->indirection_buffer;
140 struct subconvolution_params* subconvolution_params = op->subconvolution_buffer;
141 const void* input = op->input;
142 const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
143 const void* zero = op->zero_buffer;
144 const size_t input_height = op->input_height;
145 const size_t input_width = op->input_width;
146 const size_t output_height = op->output_height;
147 const size_t output_width = op->output_width;
148 const size_t kernel_height = op->kernel_height;
149 const size_t kernel_width = op->kernel_width;
150 const size_t stride_height = op->stride_height;
151 const size_t stride_width = op->stride_width;
152 const size_t padding_top = op->padding_top;
153 const size_t padding_left = op->padding_left;
154
155 const size_t modulo_padding_top = padding_top % stride_height;
156 const size_t modulo_padding_left = padding_left % stride_width;
157 for (size_t offset_y = 0; offset_y < stride_height; offset_y++) {
158 const size_t output_y_start = subtract_modulo(offset_y, modulo_padding_top, stride_height);
159 for (size_t offset_x = 0; offset_x < stride_width; offset_x++) {
160 const size_t output_x_start = subtract_modulo(offset_x, modulo_padding_left, stride_width);
161 const size_t sliced_output_width = divide_round_up(output_width - output_x_start, stride_width);
162
163 subconvolution_params->indirection_buffer = indirection_buffer;
164 subconvolution_params->indirection_y_stride =
165 subconvolution_params->indirection_x_stride * round_up(sliced_output_width, output_tile_size);
166 ++subconvolution_params;
167
168 for (size_t output_y = output_y_start; output_y < output_height; output_y += stride_height) {
169 for (size_t output_tile_start = 0; output_tile_start < sliced_output_width; output_tile_start += output_tile_size) {
170 for (size_t kernel_y = offset_y; kernel_y < kernel_height; kernel_y += stride_height) {
171 assert(doz(output_y + padding_top, kernel_y) % stride_height == 0);
172 const size_t y = output_y + padding_top - kernel_y;
173 const size_t input_y = y / stride_height;
174
175 for (size_t kernel_x = offset_x; kernel_x < kernel_width; kernel_x += stride_width) {
176 for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
177 const size_t sliced_output_x = min(output_tile_start + output_tile_offset, sliced_output_width - 1);
178 const size_t output_x = output_x_start + sliced_output_x * stride_width;
179
180 assert(doz(output_x + padding_left, kernel_x) % stride_width == 0);
181 const size_t x = output_x + padding_left - kernel_x;
182 const size_t input_x = x / stride_width;
183
184 if (input_y < input_height && input_x < input_width) {
185 *indirection_buffer++ =
186 (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
187 } else {
188 *indirection_buffer++ = zero;
189 }
190 }
191 }
192 }
193 }
194 }
195 }
196 }
197}
198
Marat Dukhanc79427c2020-10-15 09:04:21 -0700199void xnn_indirection_init_dwconv2d(
200 xnn_operator_t op,
201 size_t step_height,
202 size_t step_width,
203 uint32_t log2_element_size)
204{
205 const void** indirection_buffer = op->indirection_buffer;
206 const void* input = op->input;
207 const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
208 const void* zero = op->zero_buffer;
209 const size_t input_height = op->input_height;
210 const size_t input_width = op->input_width;
211 const size_t output_height = op->output_height;
212 const size_t output_width = op->output_width;
213 const size_t kernel_height = op->kernel_height;
214 const size_t kernel_width = op->kernel_width;
215 const size_t stride_height = op->stride_height;
216 const size_t stride_width = op->stride_width;
217 const size_t dilation_height = op->dilation_height;
218 const size_t dilation_width = op->dilation_width;
219 const size_t input_padding_top = op->padding_top;
220 const size_t input_padding_left = op->padding_left;
221
222 for (size_t output_y = 0; output_y < output_height; output_y++) {
223 for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
224 const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
225 if (input_y < input_height) {
226 for (size_t output_x = 0; output_x < output_width; output_x++) {
227 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
228 const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
229 const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
230 if (input_x < input_width) {
231 indirection_buffer[index] =
232 (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
233 } else {
234 indirection_buffer[index] = zero;
235 }
236 }
237 }
238 } else {
239 for (size_t output_x = 0; output_x < output_width; output_x++) {
240 for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
241 const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
242 indirection_buffer[index] = zero;
243 }
244 }
245 }
246 }
247 }
248}
249
XNNPACK Teamb455b122019-09-27 18:10:33 -0700250void xnn_indirection_init_maxpool2d(
251 xnn_operator_t op,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700252 size_t step_height,
253 size_t step_width,
254 uint32_t log2_element_size)
255{
256 const void** indirection_buffer = op->indirection_buffer;
257 const void* input = op->input;
258 const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700259 const size_t input_height = op->input_height;
260 const size_t input_width = op->input_width;
261 const size_t output_height = op->output_height;
262 const size_t output_width = op->output_width;
263 const size_t pooling_height = op->kernel_height;
264 const size_t pooling_width = op->kernel_width;
265 const size_t stride_height = op->stride_height;
266 const size_t stride_width = op->stride_width;
267 const size_t dilation_height = op->dilation_height;
268 const size_t dilation_width = op->dilation_width;
269 const size_t input_padding_top = op->padding_top;
270 const size_t input_padding_left = op->padding_left;
271
Marat Dukhanc58bd342020-03-19 18:53:05 -0700272 const bool any_dilation = (dilation_height | dilation_width) > 1;
273
274 if (any_dilation) {
275 // Clamp to the border doesn't work for pooling with dilation.
276 const size_t adjusted_padding_top = input_padding_top % dilation_height;
277 const size_t adjusted_padding_left = input_padding_left % dilation_width;
278 for (size_t output_y = 0; output_y < output_height; output_y++) {
279 for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
280 size_t safe_input_y = output_y * stride_height;
281 if XNN_UNPREDICTABLE(safe_input_y < adjusted_padding_top) {
282 safe_input_y += dilation_height;
283 }
284 safe_input_y -= adjusted_padding_top;
285
286 size_t input_y = output_y * stride_height + pooling_y * dilation_height - input_padding_top;
287 if XNN_UNPREDICTABLE(input_y >= input_height) {
288 input_y = safe_input_y;
289 }
290
291 for (size_t output_x = 0; output_x < output_width; output_x++) {
292 for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
293 size_t safe_input_x = output_x * stride_width;
294 if XNN_UNPREDICTABLE(safe_input_x < adjusted_padding_left) {
295 safe_input_x += dilation_width;
296 }
297 safe_input_x -= adjusted_padding_left;
298
299 size_t input_x = output_x * stride_width + pooling_x * dilation_width - input_padding_left;
300 if XNN_UNPREDICTABLE(input_x >= input_width) {
301 input_x = safe_input_x;
302 }
303
304 const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
Marat Dukhanbdc80992020-04-13 01:21:18 -0700305 indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
Marat Dukhanc58bd342020-03-19 18:53:05 -0700306 }
307 }
308 }
309 }
310 } else {
311 const size_t input_x_max = input_width - 1;
312 const size_t input_y_max = input_height - 1;
313 for (size_t output_y = 0; output_y < output_height; output_y++) {
314 for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
315 const size_t input_y = min(doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top), input_y_max);
316 for (size_t output_x = 0; output_x < output_width; output_x++) {
317 for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
318 const size_t input_x = min(doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left), input_x_max);
319 const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
Marat Dukhanbdc80992020-04-13 01:21:18 -0700320 indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
Marat Dukhanc58bd342020-03-19 18:53:05 -0700321 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700322 }
323 }
324 }
325 }
326}
327
Artsiom Ablavatski97918102020-10-27 15:52:59 -0700328void xnn_indirection_init_resize_bilinear2d_hwc_f32(
Marat Dukhan69722492019-11-11 19:55:50 -0800329 size_t input_pixel_stride,
330 size_t input_height,
331 size_t input_width,
332 size_t output_height,
333 size_t output_width,
334 const void* input,
335 const void** indirection_buffer,
336 float* packed_weights,
337 bool align_corners,
338 bool tensorflow_legacy)
339{
340 assert(input_height != 0);
341 assert(input_height < 16777216 /* 2**24 */);
342 assert(input_width != 0);
343 assert(input_width < 16777216 /* 2**24 */);
344 assert(output_height != 0);
345 assert(output_height < 16777216 /* 2**24 */);
346 assert(output_width != 0);
347 assert(output_width < 16777216 /* 2**24 */);
348
349 const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
350 const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
351 const float width_scale =
352 (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
353 const float height_scale =
354 (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
355
356 const uint32_t input_y_max = (uint32_t) input_height - 1;
357 const uint32_t input_x_max = (uint32_t) input_width - 1;
Marat Dukhanf5c46252020-05-22 10:36:13 -0700358 if (tensorflow_legacy || align_corners) {
Marat Dukhan69722492019-11-11 19:55:50 -0800359 for (size_t output_y = 0; output_y < output_height; output_y++) {
360 const float input_y = (float) (int32_t) output_y * height_scale;
361 assert(input_y >= 0.0f);
362 assert(input_y < (float) input_height);
363
364 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
365 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
366 const float alpha_y = input_y - (float) input_y_top;
367 for (size_t output_x = 0; output_x < output_width; output_x++) {
368 const float input_x = (float) (int32_t) output_x * width_scale;
369 assert(input_x >= 0.0f);
370 assert(input_x < (float) input_width);
371
372 const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
373 const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
374 const float alpha_x = input_x - (float) input_x_left;
375 indirection_buffer[0] =
376 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
377 indirection_buffer[1] =
378 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
379 indirection_buffer[2] =
380 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
381 indirection_buffer[3] =
382 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
383 packed_weights[0] = alpha_x;
384 packed_weights[1] = alpha_y;
385 indirection_buffer += 4;
386 packed_weights += 2;
387 }
388 }
389 } else {
390 const float height_offset = 0.5f * height_scale - 0.5f;
391 const float width_offset = 0.5f * width_scale - 0.5f;
392 for (size_t output_y = 0; output_y < output_height; output_y++) {
393 float input_y = (float) (int32_t) output_y * height_scale + height_offset;
394 input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
395 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
396 assert((int32_t) input_y_top >= 0);
397 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
398 const float alpha_y = input_y - (float) input_y_top;
399 for (size_t output_x = 0; output_x < output_width; output_x++) {
400 float input_x = (float) (int32_t) output_x * width_scale + width_offset;
401 input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
402 const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
403 assert((int32_t) input_x_left >= 0);
404 const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
405 const float alpha_x = input_x - (float) input_x_left;
406 indirection_buffer[0] =
407 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
408 indirection_buffer[1] =
409 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
410 indirection_buffer[2] =
411 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
412 indirection_buffer[3] =
413 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
414 packed_weights[0] = alpha_x;
415 packed_weights[1] = alpha_y;
416 indirection_buffer += 4;
417 packed_weights += 2;
418 }
419 }
420 }
421}
422
Marat Dukhan0ab75532021-11-24 16:50:30 -0800423void xnn_indirection_init_resize_bilinear2d_hwc_q11(
424 size_t input_pixel_stride,
425 size_t input_height,
426 size_t input_width,
427 size_t output_height,
428 size_t output_width,
429 const void* input,
430 const void** indirection_buffer,
431 int16_t* packed_weights,
432 bool align_corners,
433 bool tensorflow_legacy)
434{
435 assert(input_height != 0);
436 assert(input_height < 16777216 /* 2**24 */);
437 assert(input_width != 0);
438 assert(input_width < 16777216 /* 2**24 */);
439 assert(output_height != 0);
440 assert(output_height < 16777216 /* 2**24 */);
441 assert(output_width != 0);
442 assert(output_width < 16777216 /* 2**24 */);
443
444 const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
445 const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
446 const float width_scale =
447 (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
448 const float height_scale =
449 (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
450
451 const uint32_t input_y_max = (uint32_t) input_height - 1;
452 const uint32_t input_x_max = (uint32_t) input_width - 1;
453 if (tensorflow_legacy || align_corners) {
454 for (size_t output_y = 0; output_y < output_height; output_y++) {
455 const float input_y = (float) (int32_t) output_y * height_scale;
456 assert(input_y >= 0.0f);
457 assert(input_y < (float) input_height);
458
459 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
460 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
461 const float alpha_y = input_y - (float) input_y_top;
462 for (size_t output_x = 0; output_x < output_width; output_x++) {
463 const float input_x = (float) (int32_t) output_x * width_scale;
464 assert(input_x >= 0.0f);
465 assert(input_x < (float) input_width);
466
467 const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
468 const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
469 const float alpha_x = input_x - (float) input_x_left;
470 indirection_buffer[0] =
471 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
472 indirection_buffer[1] =
473 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
474 indirection_buffer[2] =
475 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
476 indirection_buffer[3] =
477 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
478 packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
479 packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
480 indirection_buffer += 4;
481 packed_weights += 2;
482 }
483 }
484 } else {
485 const float height_offset = 0.5f * height_scale - 0.5f;
486 const float width_offset = 0.5f * width_scale - 0.5f;
487 for (size_t output_y = 0; output_y < output_height; output_y++) {
488 float input_y = (float) (int32_t) output_y * height_scale + height_offset;
489 input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
490 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
491 assert((int32_t) input_y_top >= 0);
492 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
493 const float alpha_y = input_y - (float) input_y_top;
494 for (size_t output_x = 0; output_x < output_width; output_x++) {
495 float input_x = (float) (int32_t) output_x * width_scale + width_offset;
496 input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
497 const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
498 assert((int32_t) input_x_left >= 0);
499 const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
500 const float alpha_x = input_x - (float) input_x_left;
501 indirection_buffer[0] =
502 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
503 indirection_buffer[1] =
504 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
505 indirection_buffer[2] =
506 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
507 indirection_buffer[3] =
508 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
509 packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
510 packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
511 indirection_buffer += 4;
512 packed_weights += 2;
513 }
514 }
515 }
516}
517
Artsiom Ablavatski97918102020-10-27 15:52:59 -0700518void xnn_indirection_init_resize_bilinear2d_chw_f32(
519 size_t input_pixel_stride,
520 size_t input_height,
521 size_t input_width,
522 size_t output_height,
523 size_t output_width,
524 const void* input,
525 const void** indirection_buffer,
526 float* packed_weights,
527 bool align_corners,
528 bool tensorflow_legacy)
529{
530 assert(input_height > 1);
531 assert(input_height < 16777216 /* 2**24 */);
532 assert(input_width > 1);
533 assert(input_width < 16777216 /* 2**24 */);
534 assert(output_height != 0);
535 assert(output_height < 16777216 /* 2**24 */);
536 assert(output_width != 0);
537 assert(output_width < 16777216 /* 2**24 */);
538
539 const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
540 const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
541 const float width_scale =
542 (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
543 const float height_scale =
544 (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
545
546 const uint32_t input_y_max = (uint32_t) input_height - 1;
547 const uint32_t input_x_max = (uint32_t) input_width - 1;
548 if (tensorflow_legacy || align_corners) {
549 for (size_t output_y = 0; output_y < output_height; output_y++) {
550 const float input_y = (float) (int32_t) output_y * height_scale;
551 assert(input_y >= 0.0f);
552 assert(input_y < (float) input_height);
553
554 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
555 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
556 const float alpha_y = input_y - (float) input_y_top;
557 for (size_t output_x = 0; output_x < output_width; output_x++) {
558 const float input_x = (float) (int32_t) output_x * width_scale;
559 assert(input_x >= 0.0f);
560 assert(input_x < (float) input_width);
561
562 uint32_t input_x_left = (uint32_t) (int32_t) input_x;
563
564 float alpha_x = input_x - (float) input_x_left;
565 if (input_x_left == input_x_max) {
566 // Ensure that there is a pixel to the right of the one pointed at,
567 // as required by some CHW kernels.
568 --input_x_left;
569 alpha_x = 1.0f;
570 }
571 indirection_buffer[0] =
572 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
573 indirection_buffer[1] =
574 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
575 packed_weights[0] = alpha_x;
576 packed_weights[1] = alpha_y;
577 indirection_buffer += 2;
578 packed_weights += 2;
579 }
580 }
581 } else {
582 const float height_offset = 0.5f * height_scale - 0.5f;
583 const float width_offset = 0.5f * width_scale - 0.5f;
584 for (size_t output_y = 0; output_y < output_height; output_y++) {
585 float input_y = (float) (int32_t) output_y * height_scale + height_offset;
586 input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
587 const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
588 assert((int32_t) input_y_top >= 0);
589 const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
590 const float alpha_y = input_y - (float) input_y_top;
591 for (size_t output_x = 0; output_x < output_width; output_x++) {
592 float input_x = (float) (int32_t) output_x * width_scale + width_offset;
593 input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
594 uint32_t input_x_left = (uint32_t) (int32_t) input_x;
595 assert((int32_t) input_x_left >= 0);
596
597 float alpha_x = input_x - (float) input_x_left;
598 if (input_x_left == input_x_max) {
599 // Ensure that there is a pixel to the right of the one pointed at,
600 // as required by some CHW kernels.
601 --input_x_left;
602 alpha_x = 1.0f;
603 }
604
605 indirection_buffer[0] =
606 (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
607 indirection_buffer[1] =
608 (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
609 packed_weights[0] = alpha_x;
610 packed_weights[1] = alpha_y;
611 indirection_buffer += 2;
612 packed_weights += 2;
613 }
614 }
615 }
616}
617
XNNPACK Teamb455b122019-09-27 18:10:33 -0700618void xnn_indirection_init_unpool2d(
619 xnn_operator_t op,
620 size_t batch_start,
621 uint32_t log2_element_size)
622{
623 const void** indirection_buffer = op->indirection_buffer;
624 const void* output = op->output;
625 const size_t output_pixel_stride = op->output_pixel_stride << log2_element_size;
626 const size_t batch_size = op->batch_size;
627 const size_t input_height = op->input_height;
628 const size_t input_width = op->input_width;
629 const size_t output_height = op->output_height;
630 const size_t output_width = op->output_width;
631 const size_t pooling_height = op->kernel_height;
632 const size_t pooling_width = op->kernel_width;
633 const size_t output_padding_top = op->padding_top;
634 const size_t output_padding_left = op->padding_left;
635
636 for (size_t image = batch_start; image < batch_size; image++) {
637 for (size_t input_y = 0; input_y < input_height; input_y++) {
638 for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
639 const size_t output_y = min(doz(input_y * pooling_height + pooling_y, output_padding_top), output_height - 1);
640 for (size_t input_x = 0; input_x < input_width; input_x++) {
641 for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
642 const size_t output_x = min(doz(input_x * pooling_width + pooling_x, output_padding_left), output_width - 1);
643 indirection_buffer[(((image * input_height + input_y) * input_width + input_x) * pooling_width + pooling_x) * pooling_height + pooling_y] =
Marat Dukhanbdc80992020-04-13 01:21:18 -0700644 (const void*) ((uintptr_t) output + ((image * output_height + output_y) * output_width + output_x) * output_pixel_stride);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700645 }
646 }
647 }
648 }
649 }
650}