blob: e586b19a2ad19a1302f93e8a20edc3801772393b [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <assert.h>
10#include <math.h>
11#include <stdbool.h>
12#include <stddef.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <string.h>
16
17#include <xnnpack.h>
18#include <xnnpack/allocator.h>
19#include <xnnpack/operator.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/common.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070021#include <xnnpack/log.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include <xnnpack/math.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070023#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070024#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/indirection.h>
26
27
28static inline size_t compute_output_dimension(
29 size_t padded_input_dimension,
30 size_t pooling_dimension,
31 size_t stride_dimension)
32{
33 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
34}
35
Marat Dukhan466da752020-02-28 02:00:49 -080036static inline size_t compute_output_dimension_with_tf_same_padding(
37 size_t input_dimension,
38 size_t stride_dimension)
39{
40 return divide_round_up(input_dimension, stride_dimension);
41}
42
XNNPACK Teamb455b122019-09-27 18:10:33 -070043enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
44 uint32_t input_padding_top,
45 uint32_t input_padding_right,
46 uint32_t input_padding_bottom,
47 uint32_t input_padding_left,
48 uint32_t pooling_height,
49 uint32_t pooling_width,
50 uint32_t stride_height,
51 uint32_t stride_width,
52 size_t channels,
53 size_t input_pixel_stride,
54 size_t output_pixel_stride,
55 uint8_t input_zero_point,
56 float input_scale,
57 uint8_t output_zero_point,
58 float output_scale,
59 uint8_t output_min,
60 uint8_t output_max,
61 uint32_t flags,
62 xnn_operator_t* average_pooling_op_out)
63{
64 xnn_operator_t average_pooling_op = NULL;
65 enum xnn_status status = xnn_status_uninitialized;
66
67 if (!xnn_params.initialized) {
68 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
69 goto error;
70 }
71
72 status = xnn_status_invalid_parameter;
73
74 const uint32_t pooling_size = pooling_height * pooling_width;
75 if (pooling_size == 0) {
76 xnn_log_error(
77 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
78 "pooling size dimensions must be non-zero",
79 pooling_width, pooling_height);
80 goto error;
81 }
82
83 if (pooling_size == 1) {
84 xnn_log_error(
85 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
86 goto error;
87 }
88
89 if (stride_height == 0 || stride_width == 0) {
90 xnn_log_error(
91 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
92 "stride dimensions must be non-zero",
93 stride_width, stride_height);
94 goto error;
95 }
96
97 if (channels == 0) {
98 xnn_log_error(
99 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
100 channels);
101 goto error;
102 }
103
104 if (input_pixel_stride < channels) {
105 xnn_log_error(
106 "failed to create Average Pooling operator with input pixel stride of %zu: "
107 "stride must be at least as large as the number of channels (%zu)",
108 input_pixel_stride, channels);
109 goto error;
110 }
111
112 if (output_pixel_stride < channels) {
113 xnn_log_error(
114 "failed to create Average Pooling operator with output pixel stride of %zu: "
115 "stride must be at least as large as the number of channels (%zu)",
116 output_pixel_stride, channels);
117 goto error;
118 }
119
120 if (input_scale <= 0.0f || !isnormal(input_scale)) {
121 xnn_log_error(
122 "failed to create Average Pooling operator with %.7g input scale: "
123 "scale must be finite, normalized, and positive",
124 input_scale);
125 goto error;
126 }
127
128 if (output_scale <= 0.0f || !isnormal(output_scale)) {
129 xnn_log_error(
130 "failed to create Average Pooling operator with %.7g output scale: "
131 "scale must be finite, normalized, and positive",
132 output_scale);
133 goto error;
134 }
135
136 if (output_min >= output_max) {
137 xnn_log_error(
138 "failed to create Average Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
139 "range min must be below range max",
140 output_min, output_max);
141 goto error;
142 }
143
Marat Dukhan466da752020-02-28 02:00:49 -0800144 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
145 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
146 if (any_padding) {
147 xnn_log_error(
148 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
149 "TensorFlow SAME padding can't be combined with explicit padding specification",
150 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
151 goto error;
152 }
153 }
154
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155 status = xnn_status_unsupported_parameter;
156
157 const float input_output_scale = input_scale / output_scale;
158 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
159 xnn_log_error(
160 "failed to create Average Pooling operator with %.7g input scale and %.7g output scale: "
161 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
162 input_scale, output_scale, input_output_scale);
163 goto error;
164 }
165
166 if (pooling_size >= 16777216) {
167 xnn_log_error(
168 "failed to create Average Pooling operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
169 "the number of elements in the pooling area must be below 2**24",
170 pooling_size, pooling_width, pooling_height);
171 goto error;
172 }
173
174 status = xnn_status_out_of_memory;
175
Marat Dukhan04f03be2019-11-19 12:36:47 -0800176 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700177 if (average_pooling_op == NULL) {
178 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
179 goto error;
180 }
181
XNNPACK Teamb455b122019-09-27 18:10:33 -0700182 const uint32_t mr = xnn_params.q8.avgpool.mr;
183 const uint32_t qr = xnn_params.q8.avgpool.qr;
Marat Dukhan466da752020-02-28 02:00:49 -0800184 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
185 if (any_padding || tf_same_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
Marat Dukhan04f03be2019-11-19 12:36:47 -0800186 void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700187 if (zero_buffer == NULL) {
188 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
189 channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
190 goto error;
191 }
192 memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
193 average_pooling_op->zero_buffer = zero_buffer;
194 }
195
196 average_pooling_op->padding_top = input_padding_top;
197 average_pooling_op->padding_right = input_padding_right;
198 average_pooling_op->padding_bottom = input_padding_bottom;
199 average_pooling_op->padding_left = input_padding_left;
200
201 average_pooling_op->kernel_height = pooling_height;
202 average_pooling_op->kernel_width = pooling_width;
203 average_pooling_op->stride_height = stride_height;
204 average_pooling_op->stride_width = stride_width;
205 average_pooling_op->dilation_height = 1;
206 average_pooling_op->dilation_width = 1;
207 average_pooling_op->channels = channels;
208 average_pooling_op->input_pixel_stride = input_pixel_stride;
209 average_pooling_op->output_pixel_stride = output_pixel_stride;
210
211 // Number of rows read in the micro-kernel.
212 const size_t nrows = round_up(doz(pooling_size, mr), qr) + mr;
213 average_pooling_op->q8_avgpool_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700214 xnn_init_q8_avgpool_params(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215 (int32_t) -((uint32_t) input_zero_point * (uint32_t) nrows),
216 input_scale / (output_scale * (float) pooling_size),
217 output_zero_point, output_min, output_max);
218
Marat Dukhanefc47b82019-11-18 09:25:38 -0800219 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_q8;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700220 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
Marat Dukhan466da752020-02-28 02:00:49 -0800221 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222
223 *average_pooling_op_out = average_pooling_op;
224 return xnn_status_success;
225
226error:
227 xnn_delete_operator(average_pooling_op);
228 return status;
229}
230
231enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
232 uint32_t input_padding_top,
233 uint32_t input_padding_right,
234 uint32_t input_padding_bottom,
235 uint32_t input_padding_left,
236 uint32_t pooling_height,
237 uint32_t pooling_width,
238 uint32_t stride_height,
239 uint32_t stride_width,
240 size_t channels,
241 size_t input_pixel_stride,
242 size_t output_pixel_stride,
243 float output_min,
244 float output_max,
245 uint32_t flags,
246 xnn_operator_t* average_pooling_op_out)
247{
248 xnn_operator_t average_pooling_op = NULL;
249 enum xnn_status status = xnn_status_uninitialized;
250
251 if (!xnn_params.initialized) {
252 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
253 goto error;
254 }
255
256 status = xnn_status_invalid_parameter;
257
258 const uint32_t pooling_size = pooling_height * pooling_width;
259 if (pooling_size == 0) {
260 xnn_log_error(
261 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
262 "pooling size dimensions must be non-zero",
263 pooling_width, pooling_height);
264 goto error;
265 }
266
267 if (pooling_size == 1) {
268 xnn_log_error(
269 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
270 goto error;
271 }
272
273 if (stride_height == 0 || stride_width == 0) {
274 xnn_log_error(
275 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
276 "stride dimensions must be non-zero",
277 stride_width, stride_height);
278 goto error;
279 }
280
281 if (channels == 0) {
282 xnn_log_error(
283 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
284 channels);
285 goto error;
286 }
287
288 if (input_pixel_stride < channels) {
289 xnn_log_error(
290 "failed to create Average Pooling operator with input pixel stride of %zu: "
291 "stride must be at least as large as the number of channels (%zu)",
292 input_pixel_stride, channels);
293 goto error;
294 }
295
296 if (output_pixel_stride < channels) {
297 xnn_log_error(
298 "failed to create Average Pooling operator with output pixel stride of %zu: "
299 "stride must be at least as large as the number of channels (%zu)",
300 output_pixel_stride, channels);
301 goto error;
302 }
303
304 if (isnan(output_min)) {
305 xnn_log_error(
306 "failed to create Average Pooling operator with NaN output lower bound: lower bound must be non-NaN");
307 goto error;
308 }
309
310 if (isnan(output_max)) {
311 xnn_log_error(
312 "failed to create Average Pooling operator with NaN output upper bound: upper bound must be non-NaN");
313 goto error;
314 }
315
316 if (output_min >= output_max) {
317 xnn_log_error(
318 "failed to create Average Pooling operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
319 output_min, output_max);
320 goto error;
321 }
322
Marat Dukhan466da752020-02-28 02:00:49 -0800323 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
324 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
325 if (any_padding) {
326 xnn_log_error(
327 "failed to create Average Pooling operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
328 "TensorFlow SAME padding can't be combined with explicit padding specification",
329 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
330 goto error;
331 }
332 }
333
XNNPACK Teamb455b122019-09-27 18:10:33 -0700334 status = xnn_status_out_of_memory;
335
Marat Dukhan04f03be2019-11-19 12:36:47 -0800336 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700337 if (average_pooling_op == NULL) {
338 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
339 goto error;
340 }
341
XNNPACK Teamb455b122019-09-27 18:10:33 -0700342 const uint32_t mr = xnn_params.f32.avgpool.mr;
343 const uint32_t qr = xnn_params.f32.avgpool.qr;
Marat Dukhan466da752020-02-28 02:00:49 -0800344 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
345 if (any_padding || tf_same_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
Marat Dukhan04f03be2019-11-19 12:36:47 -0800346 void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700347 if (zero_buffer == NULL) {
348 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
349 channels * sizeof(float) + XNN_EXTRA_BYTES);
350 goto error;
351 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352 average_pooling_op->zero_buffer = zero_buffer;
353 }
354
355 average_pooling_op->padding_top = input_padding_top;
356 average_pooling_op->padding_right = input_padding_right;
357 average_pooling_op->padding_bottom = input_padding_bottom;
358 average_pooling_op->padding_left = input_padding_left;
359
360 average_pooling_op->kernel_height = pooling_height;
361 average_pooling_op->kernel_width = pooling_width;
362 average_pooling_op->stride_height = stride_height;
363 average_pooling_op->stride_width = stride_width;
364 average_pooling_op->dilation_height = 1;
365 average_pooling_op->dilation_width = 1;
366 average_pooling_op->channels = channels;
367 average_pooling_op->input_pixel_stride = input_pixel_stride;
368 average_pooling_op->output_pixel_stride = output_pixel_stride;
369
Marat Dukhanefc47b82019-11-18 09:25:38 -0800370 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700371 if (any_padding) {
372 average_pooling_op->f32_output_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700373 xnn_init_f32_output_params(output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700374
375 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
376 } else {
377 average_pooling_op->f32_avgpool_params =
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -0700378 xnn_init_f32_avgpool_params(1.0f / (float) pooling_size, output_min, output_max);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700379
380 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
381 }
Marat Dukhan466da752020-02-28 02:00:49 -0800382 average_pooling_op->flags = flags;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700383
384 *average_pooling_op_out = average_pooling_op;
385 return xnn_status_success;
386
387error:
388 xnn_delete_operator(average_pooling_op);
389 return status;
390}
391
Marat Dukhand6a54632020-03-11 06:22:45 -0700392static enum xnn_status setup_average_pooling2d(
393 xnn_operator_t average_pooling_op,
394 size_t batch_size,
395 size_t input_height,
396 size_t input_width,
397 const void* input,
398 void* output,
399 uint32_t log2_input_element_size,
400 uint32_t log2_output_element_size,
401 struct avgpool_parameters avgpool[restrict static 1],
402 struct pavgpool_parameters pavgpool[restrict 1],
403 const void* params,
404 size_t params_size,
405 size_t num_threads,
406 bool is_pixelwise)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700407{
Marat Dukhand6a54632020-03-11 06:22:45 -0700408 assert(!is_pixelwise || pavgpool != NULL);
409
XNNPACK Teamb455b122019-09-27 18:10:33 -0700410 average_pooling_op->state = xnn_run_state_invalid;
411
412 if (!xnn_params.initialized) {
413 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
414 return xnn_status_uninitialized;
415 }
416
417 if (input_width == 0 || input_height == 0) {
418 xnn_log_error(
419 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
420 input_width, input_height);
421 return xnn_status_invalid_parameter;
422 }
423
424 if (batch_size == 0) {
425 average_pooling_op->state = xnn_run_state_skip;
426 return xnn_status_success;
427 }
428
XNNPACK Teamb455b122019-09-27 18:10:33 -0700429 average_pooling_op->input_height = input_height;
430 average_pooling_op->input_width = input_width;
431 average_pooling_op->input = input;
432
Marat Dukhan466da752020-02-28 02:00:49 -0800433 if (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
434 average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
435 input_height, average_pooling_op->stride_height);
436 average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
437 input_width, average_pooling_op->stride_width);
438
439 const uint32_t effective_kernel_height = (average_pooling_op->kernel_height - 1) * average_pooling_op->dilation_height + 1;
440 const uint32_t effective_kernel_width = (average_pooling_op->kernel_width - 1) * average_pooling_op->dilation_width + 1;
441 const uint32_t total_padding_height =
442 (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + effective_kernel_height - input_height;
443 const uint32_t total_padding_width =
444 (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + effective_kernel_width - input_width;
445 average_pooling_op->padding_top = total_padding_height / 2;
446 average_pooling_op->padding_left = total_padding_width / 2;
447 average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
448 average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
449 } else {
450 average_pooling_op->output_height = compute_output_dimension(
451 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
452 average_pooling_op->kernel_height,
453 average_pooling_op->stride_height);
454 average_pooling_op->output_width = compute_output_dimension(
455 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
456 average_pooling_op->kernel_width,
457 average_pooling_op->stride_width);
458 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700459 average_pooling_op->output = output;
460
XNNPACK Teamb455b122019-09-27 18:10:33 -0700461 const size_t pooling_height = average_pooling_op->kernel_height;
462 const size_t pooling_width = average_pooling_op->kernel_width;
463 const size_t pooling_size = pooling_height * pooling_width;
464 const size_t output_height = average_pooling_op->output_height;
465 const size_t output_width = average_pooling_op->output_width;
Marat Dukhand6a54632020-03-11 06:22:45 -0700466
467 const uint32_t mr = is_pixelwise ? pavgpool->mr : avgpool->mr;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700468
469 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
Marat Dukhanbd8a9622019-12-06 01:05:35 -0800470 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700471
Marat Dukhan96171aa2020-02-27 18:26:48 -0800472 const size_t last_input_height = average_pooling_op->last_input_height;
473 const size_t last_input_width = average_pooling_op->last_input_width;
474 if (input_height != last_input_height || input_width != last_input_width) {
475 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
476 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
477
478 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
479 if (indirection_buffer == NULL) {
480 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
481 return xnn_status_out_of_memory;
482 }
483 average_pooling_op->indirection_buffer = indirection_buffer;
484
485 // Indirection buffer always setup for batch size 1, larger batch size supported through input_offset argument
486 average_pooling_op->batch_size = 1;
487 xnn_indirection_init_dwconv2d(
Marat Dukhand6a54632020-03-11 06:22:45 -0700488 average_pooling_op, 0, step_height, step_width, log2_input_element_size);
Marat Dukhan96171aa2020-02-27 18:26:48 -0800489
490 average_pooling_op->last_input = input;
491 average_pooling_op->last_input_height = input_height;
492 average_pooling_op->last_input_width = input_width;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700493 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700494
XNNPACK Teamb455b122019-09-27 18:10:33 -0700495 const size_t channels = average_pooling_op->channels;
496
Marat Dukhand6a54632020-03-11 06:22:45 -0700497 const size_t indirect_input_height_stride = step_height * sizeof(void*);
498 const size_t output_width_stride = average_pooling_op->output_pixel_stride << log2_output_element_size;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700499 const size_t output_height_stride = output_width * output_width_stride;
500
Marat Dukhand6a54632020-03-11 06:22:45 -0700501 if (is_pixelwise) {
502 if (input_height != last_input_height || input_width != last_input_width) {
503 const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
504 float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
505 if (pixelwise_buffer == NULL) {
506 xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
507 return xnn_status_out_of_memory;
508 }
509 average_pooling_op->pixelwise_buffer = pixelwise_buffer;
510
511 float* pixelwise_pointer = pixelwise_buffer;
512 for (size_t output_y = 0; output_y < output_height; output_y++) {
513 const size_t input_y_start = doz(output_y * average_pooling_op->stride_height, average_pooling_op->padding_top);
514 const size_t input_y_end =
515 min(doz(output_y * average_pooling_op->stride_height + average_pooling_op->kernel_height, average_pooling_op->padding_top), input_height);
516 const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
517 for (size_t output_x = 0; output_x < output_width; output_x++) {
518 const size_t input_x_start = doz(output_x * average_pooling_op->stride_width, average_pooling_op->padding_left);
519 const size_t input_x_end =
520 min(doz(output_x * average_pooling_op->stride_width + average_pooling_op->kernel_width, average_pooling_op->padding_left), input_width);
521 const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
522 *pixelwise_pointer++ = 1.0f / ((float) (int32_t) (input_y_range * input_x_range));
523 }
524 }
525 }
526
527 const uint32_t qr = pavgpool->qr;
528 const size_t multipass_adjustment =
529 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
530 average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
531 .indirect_input = average_pooling_op->indirection_buffer,
532 .indirect_input_height_stride = indirect_input_height_stride,
533 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_input_element_size,
534 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
535 .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
536 .pixelwise_buffer_height_stride = output_width * sizeof(float),
537 .output = output,
538 .output_batch_stride = output_height * output_height_stride,
539 .output_height_stride = output_height_stride,
540 .output_width = output_width,
541 .pooling_size = pooling_size,
542 .channels = channels,
543 .zero = average_pooling_op->zero_buffer,
544 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
545 .output_increment = output_width_stride - (channels << log2_output_element_size),
546 };
547 memcpy(&average_pooling_op->context.pixelwise_average_pooling.params, params, params_size);
548 if (pooling_size <= mr) {
549 average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = pavgpool->up;
550 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
551 } else {
552 average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = pavgpool->mp;
553 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
554 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700555 } else {
Marat Dukhand6a54632020-03-11 06:22:45 -0700556 const uint32_t qr = avgpool->qr;
557 const size_t multipass_adjustment =
558 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
559 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
560 .indirect_input = average_pooling_op->indirection_buffer,
561 .indirect_input_height_stride = indirect_input_height_stride,
562 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
563 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_input_element_size,
564 .output = output,
565 .output_batch_stride = output_height * output_height_stride,
566 .output_height_stride = output_height_stride,
567 .output_width = output_width,
568 .pooling_size = pooling_size,
569 .channels = channels,
570 .zero = average_pooling_op->zero_buffer,
571 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
572 .output_increment = output_width_stride - (channels << log2_output_element_size),
573 .params.f32 = average_pooling_op->f32_avgpool_params,
574 };
575 memcpy(&average_pooling_op->context.average_pooling.params, params, params_size);
576 if (pooling_size <= mr) {
577 average_pooling_op->context.average_pooling.unipass_ukernel = avgpool->up;
578 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
579 } else {
580 average_pooling_op->context.average_pooling.multipass_ukernel = avgpool->mp;
581 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
582 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700583 }
Marat Dukhan96171aa2020-02-27 18:26:48 -0800584 average_pooling_op->compute.type = xnn_parallelization_type_2d;
585 average_pooling_op->compute.range[0] = batch_size;
586 average_pooling_op->compute.range[1] = output_height;
XNNPACK Teamb455b122019-09-27 18:10:33 -0700587 average_pooling_op->state = xnn_run_state_ready;
588
XNNPACK Teamb455b122019-09-27 18:10:33 -0700589 return xnn_status_success;
590}
591
Marat Dukhand6a54632020-03-11 06:22:45 -0700592enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
593 xnn_operator_t average_pooling_op,
594 size_t batch_size,
595 size_t input_height,
596 size_t input_width,
597 const uint8_t* input,
598 uint8_t* output,
599 pthreadpool_t threadpool)
600{
601 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_q8) {
602 xnn_log_error("failed to setup Average Pooling (Q8) operator: operator type mismatch");
603 return xnn_status_invalid_parameter;
604 }
605
606 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling);
607
608 return setup_average_pooling2d(
609 average_pooling_op,
610 batch_size, input_height, input_width,
611 input, output,
612 0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
613 0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
614 &xnn_params.q8.avgpool,
615 NULL /* pavgpool */,
616 &average_pooling_op->q8_avgpool_params,
617 sizeof(average_pooling_op->q8_avgpool_params),
618 pthreadpool_get_threads_count(threadpool),
619 false /* pixelwise not supported */);
620}
621
XNNPACK Teamb455b122019-09-27 18:10:33 -0700622enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
623 xnn_operator_t average_pooling_op,
624 size_t batch_size,
625 size_t input_height,
626 size_t input_width,
627 const float* input,
628 float* output,
629 pthreadpool_t threadpool)
630{
Marat Dukhanefc47b82019-11-18 09:25:38 -0800631 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700632 xnn_log_error("failed to setup Average Pooling (F32) operator: operator type mismatch");
633 return xnn_status_invalid_parameter;
634 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700635
Marat Dukhand6a54632020-03-11 06:22:45 -0700636 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
637 average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700638
Marat Dukhand6a54632020-03-11 06:22:45 -0700639 return setup_average_pooling2d(
640 average_pooling_op,
641 batch_size, input_height, input_width,
642 input, output,
643 2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
644 2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
645 &xnn_params.f32.avgpool,
646 &xnn_params.f32.pavgpool,
647 &average_pooling_op->f32_avgpool_params,
648 sizeof(average_pooling_op->f32_avgpool_params),
649 pthreadpool_get_threads_count(threadpool),
650 average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700651}