Fix F32 IGEMM benchmark loop to not require capping NC to NR
PiperOrigin-RevId: 426226711
diff --git a/bench/f32-igemm.cc b/bench/f32-igemm.cc
index 8066259..0c41e07 100644
--- a/bench/f32-igemm.cc
+++ b/bench/f32-igemm.cc
@@ -130,15 +130,12 @@
for (uint32_t m = 0; m < output_size; m += mr) {
const uint32_t mb = min(output_size - m, mr);
- for (uint32_t n = 0; n < group_output_channels; n += nr) {
- const uint32_t nb = min(group_output_channels - n, nr);
- f32_igemm(
- mb, nb, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
- i.data() + buffer_index * i_elements + m,
- w.data() + buffer_index * w_elements + n * (kc_stride * kernel_size + 1),
- c.data() + buffer_index * c_elements + m * group_output_channels + n, group_output_channels * sizeof(float), nr * sizeof(float),
- 0, z.data(), ¶ms);
- }
+ f32_igemm(
+ mb, group_output_channels, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
+ i.data() + buffer_index * i_elements + m,
+ w.data() + buffer_index * w_elements,
+ c.data() + buffer_index * c_elements + m * group_output_channels, group_output_channels * sizeof(float), nr * sizeof(float),
+ 0, z.data(), ¶ms);
}
}