blob: 69756af84303d52c703fd5c9a5a1ea932f0857c4 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <assert.h>
10#include <math.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <string.h>
16
17#include <xnnpack.h>
18#include <xnnpack/allocator.h>
19#include <xnnpack/operator.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/common.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070021#include <xnnpack/log.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include <xnnpack/math.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070023#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070024#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/indirection.h>
26
27
28static inline size_t compute_output_dimension(
29 size_t padded_input_dimension,
30 size_t pooling_dimension,
31 size_t stride_dimension)
32{
33 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
34}
35
Marat Dukhan466da752020-02-28 02:00:49 -080036static inline size_t compute_output_dimension_with_tf_same_padding(
37 size_t input_dimension,
38 size_t stride_dimension)
39{
40 return divide_round_up(input_dimension, stride_dimension);
41}
42
XNNPACK Teamb455b122019-09-27 18:10:33 -070043enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
44 uint32_t input_padding_top,
45 uint32_t input_padding_right,
46 uint32_t input_padding_bottom,
47 uint32_t input_padding_left,
48 uint32_t pooling_height,
49 uint32_t pooling_width,
50 uint32_t stride_height,
51 uint32_t stride_width,
52 size_t channels,
53 size_t input_pixel_stride,
54 size_t output_pixel_stride,
55 uint8_t input_zero_point,
56 float input_scale,
57 uint8_t output_zero_point,
58 float output_scale,
59 uint8_t output_min,
60 uint8_t output_max,
61 uint32_t flags,
62 xnn_operator_t* average_pooling_op_out)
63{
64 xnn_operator_t average_pooling_op = NULL;
65 enum xnn_status status = xnn_status_uninitialized;
66
67 if (!xnn_params.initialized) {
68 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
69 goto error;
70 }
71
72 status = xnn_status_invalid_parameter;
73
74 const uint32_t pooling_size = pooling_height * pooling_width;
75 if (pooling_size == 0) {
76 xnn_log_error(
77 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
78 "pooling size dimensions must be non-zero",
79 pooling_width, pooling_height);
80 goto error;
81 }
82
83 if (pooling_size == 1) {
84 xnn_log_error(
85 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
86 goto error;
87 }
88
89 if (stride_height == 0 || stride_width == 0) {
90 xnn_log_error(
91 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
92 "stride dimensions must be non-zero",
93 stride_width, stride_height);
94 goto error;
95 }
96
97 if (channels == 0) {
98 xnn_log_error(
99 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
100 channels);
101 goto error;
102 }
103
104 if (input_pixel_stride < channels) {
105 xnn_log_error(
106 "failed to create Average Pooling operator with input pixel stride of %zu: "
107 "stride must be at least as large as the number of channels (%zu)",
108 input_pixel_stride, channels);
109 goto error;
110 }
111
112 if (output_pixel_stride < channels) {
113 xnn_log_error(
114 "failed to create Average Pooling operator with output pixel stride of %zu: "
115 "stride must be at least as large as the number of channels (%zu)",
116 output_pixel_stride, channels);
117 goto error;
118 }
119
120 if (input_scale <= 0.0f || !isnormal(input_scale)) {
121 xnn_log_error(
122 "failed to create Average Pooling operator with %.7g input scale: "
123 "scale must be finite, normalized, and positive",
124 input_scale);
125 goto error;
126 }
127
128 if (output_scale <= 0.0f || !isnormal(output_scale)) {
129 xnn_log_error(
130 "failed to create Average Pooling operator with %.7g output scale: "
131 "scale must be finite, normalized, and positive",
132 output_scale);
133 goto error;
134 }
135
136 if (output_min >= output_max) {
137 xnn_log_error(
138 "failed to create Average Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
139 "range min must be below range max",
140 output_min, output_max);
141 goto error;
142 }
143
Marat Dukhan466da752020-02-28 02:00:49 -0800144 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
145 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
146 if (any_padding) {
147 xnn_log_error(
148 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
149 "TensorFlow SAME padding can't be combined with explicit padding specification",
150 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
151 goto error;
152 }
153 }
154
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155 status = xnn_status_unsupported_parameter;
156
157 const float input_output_scale = input_scale / output_scale;
158 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
159 xnn_log_error(
160 "failed to create Average Pooling operator with %.7g input scale and %.7g output scale: "
161 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
162 input_scale, output_scale, input_output_scale);
163 goto error;
164 }
165
166 if (pooling_size >= 16777216) {
167 xnn_log_error(
168 "failed to create Average Pooling operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
169 "the number of elements in the pooling area must be below 2**24",
170 pooling_size, pooling_width, pooling_height);
171 goto error;
172 }
173
174 status = xnn_status_out_of_memory;
175
Marat Dukhan04f03be2019-11-19 12:36:47 -0800176 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700177 if (average_pooling_op == NULL) {
178 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
179 goto error;
180 }
181
XNNPACK Teamb455b122019-09-27 18:10:33 -0700182 const uint32_t mr = xnn_params.q8.avgpool.mr;
183 const uint32_t qr = xnn_params.q8.avgpool.qr;
Marat Dukhan466da752020-02-28 02:00:49 -0800184 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
185 if (any_padding || tf_same_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
Marat Dukhan04f03be2019-11-19 12:36:47 -0800186 void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700187 if (zero_buffer == NULL) {
188 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
189 channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
190 goto error;
191 }
192 memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
193 average_pooling_op->zero_buffer = zero_buffer;
194 }
195
196 average_pooling_op->padding_top = input_padding_top;
197 average_pooling_op->padding_right = input_padding_right;
198 average_pooling_op->padding_bottom = input_padding_bottom;
199 average_pooling_op->padding_left = input_padding_left;
200
201 average_pooling_op->kernel_height = pooling_height;
202 average_pooling_op->kernel_width = pooling_width;
203 average_pooling_op->stride_height = stride_height;
204 average_pooling_op->stride_width = stride_width;
205 average_pooling_op->dilation_height = 1;
206 average_pooling_op->dilation_width = 1;
207 average_pooling_op->channels = channels;
208 average_pooling_op->input_pixel_stride = input_pixel_stride;
209 average_pooling_op->output_pixel_stride = output_pixel_stride;
210
211 // Number of rows read in the micro-kernel.
212 const size_t nrows = round_up(doz(pooling_size, mr), qr) + mr;
213 average_pooling_op->q8_avgpool_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700214 xnn_init_q8_avgpool_params(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215 (int32_t) -((uint32_t) input_zero_point * (uint32_t) nrows),
216 input_scale / (output_scale * (float) pooling_size),
217 output_zero_point, output_min, output_max);
218
Marat Dukhanefc47b82019-11-18 09:25:38 -0800219 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_q8;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700220 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
Marat Dukhan466da752020-02-28 02:00:49 -0800221 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222
223 *average_pooling_op_out = average_pooling_op;
224 return xnn_status_success;
225
226error:
227 xnn_delete_operator(average_pooling_op);
228 return status;
229}
230
231enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
232 uint32_t input_padding_top,
233 uint32_t input_padding_right,
234 uint32_t input_padding_bottom,
235 uint32_t input_padding_left,
236 uint32_t pooling_height,
237 uint32_t pooling_width,
238 uint32_t stride_height,
239 uint32_t stride_width,
240 size_t channels,
241 size_t input_pixel_stride,
242 size_t output_pixel_stride,
243 float output_min,
244 float output_max,
245 uint32_t flags,
246 xnn_operator_t* average_pooling_op_out)
247{
248 xnn_operator_t average_pooling_op = NULL;
249 enum xnn_status status = xnn_status_uninitialized;
250
251 if (!xnn_params.initialized) {
252 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
253 goto error;
254 }
255
256 status = xnn_status_invalid_parameter;
257
258 const uint32_t pooling_size = pooling_height * pooling_width;
259 if (pooling_size == 0) {
260 xnn_log_error(
261 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
262 "pooling size dimensions must be non-zero",
263 pooling_width, pooling_height);
264 goto error;
265 }
266
267 if (pooling_size == 1) {
268 xnn_log_error(
269 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
270 goto error;
271 }
272
273 if (stride_height == 0 || stride_width == 0) {
274 xnn_log_error(
275 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
276 "stride dimensions must be non-zero",
277 stride_width, stride_height);
278 goto error;
279 }
280
281 if (channels == 0) {
282 xnn_log_error(
283 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
284 channels);
285 goto error;
286 }
287
288 if (input_pixel_stride < channels) {
289 xnn_log_error(
290 "failed to create Average Pooling operator with input pixel stride of %zu: "
291 "stride must be at least as large as the number of channels (%zu)",
292 input_pixel_stride, channels);
293 goto error;
294 }
295
296 if (output_pixel_stride < channels) {
297 xnn_log_error(
298 "failed to create Average Pooling operator with output pixel stride of %zu: "
299 "stride must be at least as large as the number of channels (%zu)",
300 output_pixel_stride, channels);
301 goto error;
302 }
303
304 if (isnan(output_min)) {
305 xnn_log_error(
306 "failed to create Average Pooling operator with NaN output lower bound: lower bound must be non-NaN");
307 goto error;
308 }
309
310 if (isnan(output_max)) {
311 xnn_log_error(
312 "failed to create Average Pooling operator with NaN output upper bound: upper bound must be non-NaN");
313 goto error;
314 }
315
316 if (output_min >= output_max) {
317 xnn_log_error(
318 "failed to create Average Pooling operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
319 output_min, output_max);
320 goto error;
321 }
322
Marat Dukhan466da752020-02-28 02:00:49 -0800323 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
324 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
325 if (any_padding) {
326 xnn_log_error(
327 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
328 "TensorFlow SAME padding can't be combined with explicit padding specification",
329 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
330 goto error;
331 }
332 }
333
XNNPACK Teamb455b122019-09-27 18:10:33 -0700334 status = xnn_status_out_of_memory;
335
Marat Dukhan04f03be2019-11-19 12:36:47 -0800336 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337 if (average_pooling_op == NULL) {
338 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
339 goto error;
340 }
341
XNNPACK Teamb455b122019-09-27 18:10:33 -0700342 const uint32_t mr = xnn_params.f32.avgpool.mr;
343 const uint32_t qr = xnn_params.f32.avgpool.qr;
Marat Dukhan466da752020-02-28 02:00:49 -0800344 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
345 if (any_padding || tf_same_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
Marat Dukhan04f03be2019-11-19 12:36:47 -0800346 void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700347 if (zero_buffer == NULL) {
348 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
349 channels * sizeof(float) + XNN_EXTRA_BYTES);
350 goto error;
351 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352 average_pooling_op->zero_buffer = zero_buffer;
353 }
354
355 average_pooling_op->padding_top = input_padding_top;
356 average_pooling_op->padding_right = input_padding_right;
357 average_pooling_op->padding_bottom = input_padding_bottom;
358 average_pooling_op->padding_left = input_padding_left;
359
360 average_pooling_op->kernel_height = pooling_height;
361 average_pooling_op->kernel_width = pooling_width;
362 average_pooling_op->stride_height = stride_height;
363 average_pooling_op->stride_width = stride_width;
364 average_pooling_op->dilation_height = 1;
365 average_pooling_op->dilation_width = 1;
366 average_pooling_op->channels = channels;
367 average_pooling_op->input_pixel_stride = input_pixel_stride;
368 average_pooling_op->output_pixel_stride = output_pixel_stride;
369
Marat Dukhanefc47b82019-11-18 09:25:38 -0800370 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 if (any_padding) {
372 average_pooling_op->f32_output_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700373 xnn_init_f32_output_params(output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700374
375 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
376 } else {
377 average_pooling_op->f32_avgpool_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700378 xnn_init_f32_avgpool_params(1.0f / (float) pooling_size, output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700379
380 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
381 }
Marat Dukhan466da752020-02-28 02:00:49 -0800382 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700383
384 *average_pooling_op_out = average_pooling_op;
385 return xnn_status_success;
386
387error:
388 xnn_delete_operator(average_pooling_op);
389 return status;
390}
391
392enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
393 xnn_operator_t average_pooling_op,
394 size_t batch_size,
395 size_t input_height,
396 size_t input_width,
397 const uint8_t* input,
398 uint8_t* output,
399 pthreadpool_t threadpool)
400{
Marat Dukhanefc47b82019-11-18 09:25:38 -0800401 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_q8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700402 xnn_log_error("failed to setup Average Pooling (Q8) operator: operator type mismatch");
403 return xnn_status_invalid_parameter;
404 }
405 average_pooling_op->state = xnn_run_state_invalid;
406
407 if (!xnn_params.initialized) {
408 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
409 return xnn_status_uninitialized;
410 }
411
412 if (input_width == 0 || input_height == 0) {
413 xnn_log_error(
414 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
415 input_width, input_height);
416 return xnn_status_invalid_parameter;
417 }
418
419 if (batch_size == 0) {
420 average_pooling_op->state = xnn_run_state_skip;
421 return xnn_status_success;
422 }
423
XNNPACK Teamb455b122019-09-27 18:10:33 -0700424 average_pooling_op->input_height = input_height;
425 average_pooling_op->input_width = input_width;
426 average_pooling_op->input = input;
427
Marat Dukhan466da752020-02-28 02:00:49 -0800428 if (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
429 average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
430 input_height, average_pooling_op->stride_height);
431 average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
432 input_width, average_pooling_op->stride_width);
433
434 const uint32_t effective_kernel_height = (average_pooling_op->kernel_height - 1) * average_pooling_op->dilation_height + 1;
435 const uint32_t effective_kernel_width = (average_pooling_op->kernel_width - 1) * average_pooling_op->dilation_width + 1;
436 const uint32_t total_padding_height =
437 (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + effective_kernel_height - input_height;
438 const uint32_t total_padding_width =
439 (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + effective_kernel_width - input_width;
440 average_pooling_op->padding_top = total_padding_height / 2;
441 average_pooling_op->padding_left = total_padding_width / 2;
442 average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
443 average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
444 } else {
445 average_pooling_op->output_height = compute_output_dimension(
446 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
447 average_pooling_op->kernel_height,
448 average_pooling_op->stride_height);
449 average_pooling_op->output_width = compute_output_dimension(
450 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
451 average_pooling_op->kernel_width,
452 average_pooling_op->stride_width);
453 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700454 average_pooling_op->output = output;
455
XNNPACK Teamb455b122019-09-27 18:10:33 -0700456 const size_t pooling_height = average_pooling_op->kernel_height;
457 const size_t pooling_width = average_pooling_op->kernel_width;
458 const size_t pooling_size = pooling_height * pooling_width;
459 const size_t output_height = average_pooling_op->output_height;
460 const size_t output_width = average_pooling_op->output_width;
461 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
462 const uint32_t mr = xnn_params.q8.avgpool.mr;
463
464 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
Marat Dukhanbd8a9622019-12-06 01:05:35 -0800465 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700466
Marat Dukhan96171aa2020-02-27 18:26:48 -0800467 const size_t last_input_height = average_pooling_op->last_input_height;
468 const size_t last_input_width = average_pooling_op->last_input_width;
469 if (input_height != last_input_height || input_width != last_input_width) {
470 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
471 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
472
473 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
474 if (indirection_buffer == NULL) {
475 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
476 return xnn_status_out_of_memory;
477 }
478 average_pooling_op->indirection_buffer = indirection_buffer;
479
480 // Indirection buffer always setup for batch size 1, larger batch size supported through input_offset argument
481 average_pooling_op->batch_size = 1;
482 xnn_indirection_init_dwconv2d(
483 average_pooling_op, 0, step_height, step_width, 0 /* log2(sizeof(uint8_t)) */);
484
485 average_pooling_op->last_input = input;
486 average_pooling_op->last_input_height = input_height;
487 average_pooling_op->last_input_width = input_width;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700488 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700489
490 const uint32_t qr = xnn_params.q8.avgpool.qr;
491 const size_t channels = average_pooling_op->channels;
492
XNNPACK Teamb455b122019-09-27 18:10:33 -0700493 const size_t output_width_stride = average_pooling_op->output_pixel_stride * sizeof(uint8_t);
494 const size_t output_height_stride = output_width * output_width_stride;
495
496 const size_t multipass_adjustment =
497 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
498 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
Marat Dukhan96171aa2020-02-27 18:26:48 -0800499 .indirect_input = average_pooling_op->indirection_buffer,
500 .indirect_input_height_stride = step_height * sizeof(void*),
501 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
502 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride * sizeof(uint8_t),
503 .output = output,
504 .output_batch_stride = output_height * output_height_stride,
505 .output_height_stride = output_height_stride,
506 .output_width = output_width,
507 .pooling_size = pooling_size,
508 .channels = channels,
509 .zero = average_pooling_op->zero_buffer,
510 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
511 .output_increment = output_width_stride - channels * sizeof(uint8_t),
512 .params.q8 = average_pooling_op->q8_avgpool_params,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700513 };
XNNPACK Teamb455b122019-09-27 18:10:33 -0700514 if (pooling_size <= mr) {
515 average_pooling_op->context.average_pooling.unipass_ukernel = xnn_params.q8.avgpool.up;
516 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
517 } else {
518 average_pooling_op->context.average_pooling.multipass_ukernel = xnn_params.q8.avgpool.mp;
519 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
520 }
Marat Dukhan96171aa2020-02-27 18:26:48 -0800521 average_pooling_op->compute.type = xnn_parallelization_type_2d;
522 average_pooling_op->compute.range[0] = batch_size;
523 average_pooling_op->compute.range[1] = output_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700524 average_pooling_op->state = xnn_run_state_ready;
525
XNNPACK Teamb455b122019-09-27 18:10:33 -0700526 return xnn_status_success;
527}
528
529enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
530 xnn_operator_t average_pooling_op,
531 size_t batch_size,
532 size_t input_height,
533 size_t input_width,
534 const float* input,
535 float* output,
536 pthreadpool_t threadpool)
537{
Marat Dukhanefc47b82019-11-18 09:25:38 -0800538 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700539 xnn_log_error("failed to setup Average Pooling (F32) operator: operator type mismatch");
540 return xnn_status_invalid_parameter;
541 }
542 average_pooling_op->state = xnn_run_state_invalid;
543
544 if (!xnn_params.initialized) {
545 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
546 return xnn_status_uninitialized;
547 }
548
549 if (input_width == 0 || input_height == 0) {
550 xnn_log_error(
551 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
552 input_width, input_height);
553 return xnn_status_invalid_parameter;
554 }
555
556 if (batch_size == 0) {
557 average_pooling_op->state = xnn_run_state_skip;
558 return xnn_status_success;
559 }
560
XNNPACK Teamb455b122019-09-27 18:10:33 -0700561 average_pooling_op->input_height = input_height;
562 average_pooling_op->input_width = input_width;
563 average_pooling_op->input = input;
564
Marat Dukhan466da752020-02-28 02:00:49 -0800565 if (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
566 average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
567 input_height, average_pooling_op->stride_height);
568 average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
569 input_width, average_pooling_op->stride_width);
570
571 const uint32_t effective_kernel_height = (average_pooling_op->kernel_height - 1) * average_pooling_op->dilation_height + 1;
572 const uint32_t effective_kernel_width = (average_pooling_op->kernel_width - 1) * average_pooling_op->dilation_width + 1;
573 const uint32_t total_padding_height =
574 (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + effective_kernel_height - input_height;
575 const uint32_t total_padding_width =
576 (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + effective_kernel_width - input_width;
577 average_pooling_op->padding_top = total_padding_height / 2;
578 average_pooling_op->padding_left = total_padding_width / 2;
579 average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
580 average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
581 } else {
582 average_pooling_op->output_height = compute_output_dimension(
583 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
584 average_pooling_op->kernel_height,
585 average_pooling_op->stride_height);
586 average_pooling_op->output_width = compute_output_dimension(
587 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
588 average_pooling_op->kernel_width,
589 average_pooling_op->stride_width);
590 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700591 average_pooling_op->output = output;
592
XNNPACK Teamb455b122019-09-27 18:10:33 -0700593 const size_t pooling_height = average_pooling_op->kernel_height;
594 const size_t pooling_width = average_pooling_op->kernel_width;
595 const size_t pooling_size = pooling_height * pooling_width;
596 const size_t output_height = average_pooling_op->output_height;
597 const size_t output_width = average_pooling_op->output_width;
598 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
599 const uint32_t mr = xnn_params.f32.avgpool.mr;
600 assert(mr == xnn_params.f32.pavgpool.mr);
601
602 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
Marat Dukhanbd8a9622019-12-06 01:05:35 -0800603 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700604
Marat Dukhan96171aa2020-02-27 18:26:48 -0800605 const size_t last_input_height = average_pooling_op->last_input_height;
606 const size_t last_input_width = average_pooling_op->last_input_width;
607 if (input_height != last_input_height || input_width != last_input_width) {
608 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
609 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
610
611 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
612 if (indirection_buffer == NULL) {
613 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
614 return xnn_status_out_of_memory;
615 }
616 average_pooling_op->indirection_buffer = indirection_buffer;
617
618 // Indirection buffer always setup for batch size 1, larger batch size supported through input_offset argument
619 average_pooling_op->batch_size = 1;
620 xnn_indirection_init_dwconv2d(
621 average_pooling_op, 0, step_height, step_width, 2 /* log2(sizeof(float)) */);
622
623 average_pooling_op->last_input = input;
624 average_pooling_op->last_input_height = input_height;
625 average_pooling_op->last_input_width = input_width;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700626 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700627
628 const size_t channels = average_pooling_op->channels;
629
630 const size_t indirect_input_height_stride = step_height * sizeof(void*);
631 const size_t output_width_stride = average_pooling_op->output_pixel_stride * sizeof(float);
632 const size_t output_height_stride = output_width * output_width_stride;
633
634 switch (average_pooling_op->ukernel.type) {
635 case xnn_ukernel_type_average_pooling:
636 {
637 const uint32_t qr = xnn_params.f32.avgpool.qr;
638 const size_t multipass_adjustment =
639 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
640 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
Marat Dukhan96171aa2020-02-27 18:26:48 -0800641 .indirect_input = average_pooling_op->indirection_buffer,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700642 .indirect_input_height_stride = indirect_input_height_stride,
Marat Dukhan96171aa2020-02-27 18:26:48 -0800643 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
644 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride * sizeof(float),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700645 .output = output,
646 .output_batch_stride = output_height * output_height_stride,
647 .output_height_stride = output_height_stride,
648 .output_width = output_width,
649 .pooling_size = pooling_size,
650 .channels = channels,
651 .zero = average_pooling_op->zero_buffer,
652 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
653 .output_increment = output_width_stride - channels * sizeof(float),
654 .params.f32 = average_pooling_op->f32_avgpool_params,
655 };
656 if (pooling_size <= mr) {
657 average_pooling_op->context.average_pooling.unipass_ukernel = xnn_params.f32.avgpool.up;
658 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
659 } else {
660 average_pooling_op->context.average_pooling.multipass_ukernel = xnn_params.f32.avgpool.mp;
661 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
662 }
663 break;
664 }
665 case xnn_ukernel_type_pixelwise_average_pooling:
666 {
Marat Dukhan96171aa2020-02-27 18:26:48 -0800667 if (input_height != last_input_height || input_width != last_input_width) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700668 const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
Marat Dukhan04f03be2019-11-19 12:36:47 -0800669 float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700670 if (pixelwise_buffer == NULL) {
671 xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
672 return xnn_status_out_of_memory;
673 }
674 average_pooling_op->pixelwise_buffer = pixelwise_buffer;
675
676 float* pixelwise_pointer = pixelwise_buffer;
677 for (size_t output_y = 0; output_y < output_height; output_y++) {
678 const size_t input_y_start = doz(output_y * average_pooling_op->stride_height, average_pooling_op->padding_top);
679 const size_t input_y_end =
680 min(doz(output_y * average_pooling_op->stride_height + average_pooling_op->kernel_height, average_pooling_op->padding_top), input_height);
681 const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
682 for (size_t output_x = 0; output_x < output_width; output_x++) {
683 const size_t input_x_start = doz(output_x * average_pooling_op->stride_width, average_pooling_op->padding_left);
684 const size_t input_x_end =
685 min(doz(output_x * average_pooling_op->stride_width + average_pooling_op->kernel_width, average_pooling_op->padding_left), input_width);
686 const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
687 *pixelwise_pointer++ = 1.0f / ((float) (int32_t) (input_y_range * input_x_range));
688 }
689 }
690 }
691
692 const uint32_t qr = xnn_params.f32.pavgpool.qr;
693 const size_t multipass_adjustment =
694 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
695 average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
Marat Dukhan96171aa2020-02-27 18:26:48 -0800696 .indirect_input = average_pooling_op->indirection_buffer,
XNNPACK Teamb455b122019-09-27 18:10:33 -0700697 .indirect_input_height_stride = indirect_input_height_stride,
Marat Dukhan96171aa2020-02-27 18:26:48 -0800698 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride * sizeof(float),
699 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
XNNPACK Teamb455b122019-09-27 18:10:33 -0700700 .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
701 .pixelwise_buffer_height_stride = output_width * sizeof(float),
702 .output = output,
703 .output_batch_stride = output_height * output_height_stride,
704 .output_height_stride = output_height_stride,
705 .output_width = output_width,
706 .pooling_size = pooling_size,
707 .channels = channels,
708 .zero = average_pooling_op->zero_buffer,
709 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
710 .output_increment = output_width_stride - channels * sizeof(float),
711 .params.f32 = average_pooling_op->f32_output_params,
712 };
713 if (pooling_size <= mr) {
714 average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = xnn_params.f32.pavgpool.up;
715 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
716 } else {
717 average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = xnn_params.f32.pavgpool.mp;
718 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
719 }
720 break;
721 }
722 default:
723 XNN_UNREACHABLE;
724 }
725 average_pooling_op->compute.type = xnn_parallelization_type_2d;
726 average_pooling_op->compute.range[0] = batch_size;
727 average_pooling_op->compute.range[1] = output_height;
728 average_pooling_op->state = xnn_run_state_ready;
729
XNNPACK Teamb455b122019-09-27 18:10:33 -0700730 return xnn_status_success;
731}