Avoid batch-replication of indirection buffer in DW Conv and Avg Pooling - Indirection buffer is no longer proportional to batch size - No need to reinitialize indirection buffer after change in batch size PiperOrigin-RevId: 337318984

commit: c79427c0678606e1eeabea288cc757012f2d1be4 [log] [tgz]
author: Marat Dukhan <maratek@google.com> Thu Oct 15 09:04:21 2020 -0700
committer: XNNPACK Team <xnnpack-github-robot@google.com> Thu Oct 15 09:04:55 2020 -0700
tree: 1357b3e28872bc06805e33db51626bef0eee4a6a
parent: dc2b29cdba1e5c324450148b81b59684778192bd [diff]
diff --git a/bench/f16-dwconv.cc b/bench/f16-dwconv.cc
index 58c9e3d..836da0b 100644
--- a/bench/f16-dwconv.cc
+++ b/bench/f16-dwconv.cc

@@ -102,7 +102,6 @@
   convolution_op.input              = a.data();
   convolution_op.input_pixel_stride = channels;
   convolution_op.zero_buffer        = z.data();
-  convolution_op.batch_size         = 1;
   convolution_op.input_height       = input_height;
   convolution_op.input_width        = input_width;
   convolution_op.output_height      = output_height;
@@ -116,7 +115,7 @@
   convolution_op.padding_top        = padding_top;
   convolution_op.padding_left       = padding_left;
 
-  xnn_indirection_init_dwconv2d(&convolution_op, 0, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */);
+  xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */);
   for (size_t n = 1; n < num_buffers; n++) {
     std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
   }
@@ -134,7 +133,7 @@
     buffer_index = (buffer_index + 1) % num_buffers;
     state.ResumeTiming();
 
-    for (uint32_t y = 0; y < output_height; y++) {
+    for (size_t y = 0; y < output_height; y++) {
       dwconv(channels, output_width,
         reinterpret_cast<const void**>(i.data() + buffer_index * i_elements + step_height * y),
         w.data() + buffer_index * w_elements,

diff --git a/bench/f32-dwconv.cc b/bench/f32-dwconv.cc
index b679c2f..648cf34 100644
--- a/bench/f32-dwconv.cc
+++ b/bench/f32-dwconv.cc

@@ -100,7 +100,6 @@
   convolution_op.input              = a.data();
   convolution_op.input_pixel_stride = channels;
   convolution_op.zero_buffer        = z.data();
-  convolution_op.batch_size         = 1;
   convolution_op.input_height       = input_height;
   convolution_op.input_width        = input_width;
   convolution_op.output_height      = output_height;
@@ -114,7 +113,7 @@
   convolution_op.padding_top        = padding_top;
   convolution_op.padding_left       = padding_left;
 
-  xnn_indirection_init_dwconv2d(&convolution_op, 0, step_height, step_width, 2 /* log2(sizeof(float)) */);
+  xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 2 /* log2(sizeof(float)) */);
   for (size_t n = 1; n < num_buffers; n++) {
     std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
   }
@@ -132,7 +131,7 @@
     buffer_index = (buffer_index + 1) % num_buffers;
     state.ResumeTiming();
 
-    for (uint32_t y = 0; y < output_height; y++) {
+    for (size_t y = 0; y < output_height; y++) {
       dwconv(channels, output_width,
         i.data() + buffer_index * i_elements + step_height * y,
         w.data() + buffer_index * w_elements,
commit	c79427c0678606e1eeabea288cc757012f2d1be4	[log] [tgz]
author	Marat Dukhan <maratek@google.com>	Thu Oct 15 09:04:21 2020 -0700
committer	XNNPACK Team <xnnpack-github-robot@google.com>	Thu Oct 15 09:04:55 2020 -0700
tree	1357b3e28872bc06805e33db51626bef0eee4a6a
parent	dc2b29cdba1e5c324450148b81b59684778192bd [diff]