Avoid batch-replication of indirection buffer in DW Conv and Avg Pooling
- Indirection buffer is no longer proportional to batch size
- No need to reinitialize indirection buffer after change in batch size
PiperOrigin-RevId: 337318984
diff --git a/bench/f16-dwconv.cc b/bench/f16-dwconv.cc
index 58c9e3d..836da0b 100644
--- a/bench/f16-dwconv.cc
+++ b/bench/f16-dwconv.cc
@@ -102,7 +102,6 @@
convolution_op.input = a.data();
convolution_op.input_pixel_stride = channels;
convolution_op.zero_buffer = z.data();
- convolution_op.batch_size = 1;
convolution_op.input_height = input_height;
convolution_op.input_width = input_width;
convolution_op.output_height = output_height;
@@ -116,7 +115,7 @@
convolution_op.padding_top = padding_top;
convolution_op.padding_left = padding_left;
- xnn_indirection_init_dwconv2d(&convolution_op, 0, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */);
+ xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 1 /* log2(sizeof(uint16_t)) */);
for (size_t n = 1; n < num_buffers; n++) {
std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
}
@@ -134,7 +133,7 @@
buffer_index = (buffer_index + 1) % num_buffers;
state.ResumeTiming();
- for (uint32_t y = 0; y < output_height; y++) {
+ for (size_t y = 0; y < output_height; y++) {
dwconv(channels, output_width,
reinterpret_cast<const void**>(i.data() + buffer_index * i_elements + step_height * y),
w.data() + buffer_index * w_elements,
diff --git a/bench/f32-dwconv.cc b/bench/f32-dwconv.cc
index b679c2f..648cf34 100644
--- a/bench/f32-dwconv.cc
+++ b/bench/f32-dwconv.cc
@@ -100,7 +100,6 @@
convolution_op.input = a.data();
convolution_op.input_pixel_stride = channels;
convolution_op.zero_buffer = z.data();
- convolution_op.batch_size = 1;
convolution_op.input_height = input_height;
convolution_op.input_width = input_width;
convolution_op.output_height = output_height;
@@ -114,7 +113,7 @@
convolution_op.padding_top = padding_top;
convolution_op.padding_left = padding_left;
- xnn_indirection_init_dwconv2d(&convolution_op, 0, step_height, step_width, 2 /* log2(sizeof(float)) */);
+ xnn_indirection_init_dwconv2d(&convolution_op, step_height, step_width, 2 /* log2(sizeof(float)) */);
for (size_t n = 1; n < num_buffers; n++) {
std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
}
@@ -132,7 +131,7 @@
buffer_index = (buffer_index + 1) % num_buffers;
state.ResumeTiming();
- for (uint32_t y = 0; y < output_height; y++) {
+ for (size_t y = 0; y < output_height; y++) {
dwconv(channels, output_width,
i.data() + buffer_index * i_elements + step_height * y,
w.data() + buffer_index * w_elements,