DWCONV add input_offset and zero parameters

PiperOrigin-RevId: 311991960
diff --git a/test/f32-dwconv-minmax.cc b/test/f32-dwconv-minmax.cc
index cc9cff2..cae6fd8 100644
--- a/test/f32-dwconv-minmax.cc
+++ b/test/f32-dwconv-minmax.cc
@@ -176,6 +176,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__AARCH64_NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__AARCH64_NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM64
 
 
@@ -344,6 +371,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__AARCH64_NEONFMA_CORTEX_A55, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 12; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__AARCH64_NEONFMA_CORTEX_A55, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 12; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
 
 
@@ -503,6 +557,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -662,6 +743,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -821,6 +929,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -980,6 +1115,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1139,6 +1301,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1298,6 +1487,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1457,6 +1673,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1616,6 +1859,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1775,6 +2045,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -1934,6 +2231,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2093,6 +2417,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEONFMA, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEONFMA, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2252,6 +2603,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEONFMA_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEONFMA_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neonfma_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2411,6 +2789,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2570,6 +2975,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2729,6 +3161,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -2888,6 +3347,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3047,6 +3533,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3206,6 +3719,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3365,6 +3905,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3524,6 +4091,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3683,6 +4277,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -3842,6 +4463,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -4001,6 +4649,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEON, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEON, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -4160,6 +4835,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEON_ACC2, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__NEON_ACC2, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__neon_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
@@ -4319,6 +5021,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -4478,6 +5207,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -4637,6 +5393,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -4796,6 +5579,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -4955,6 +5765,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5114,6 +5951,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5273,6 +6137,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5432,6 +6323,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5591,6 +6509,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5750,6 +6695,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -5909,6 +6881,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__SSE, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__SSE, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6068,6 +7067,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__SSE_ACC2, input_offset) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__SSE_ACC2, zero) {
+    TEST_REQUIRES_X86_SSE;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__sse_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6227,6 +7253,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6386,6 +7439,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6545,6 +7625,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6704,6 +7811,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -6863,6 +7997,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7022,6 +8183,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7181,6 +8369,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7340,6 +8555,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7499,6 +8741,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7658,6 +8927,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7817,6 +9113,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -7976,6 +9299,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8135,6 +9485,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8294,6 +9671,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8453,6 +9857,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8612,6 +10043,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8771,6 +10229,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -8930,6 +10415,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9089,6 +10601,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9248,6 +10787,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9407,6 +10973,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9566,6 +11159,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9725,6 +11345,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__FMA3, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__FMA3, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -9884,6 +11531,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__FMA3_ACC2, input_offset) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__FMA3_ACC2, zero) {
+    TEST_REQUIRES_X86_FMA3;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10043,6 +11717,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10202,6 +11903,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X25__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10361,6 +12089,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X25__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X25__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10520,6 +12275,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X25__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X25__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10679,6 +12461,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10838,6 +12647,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X9__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -10997,6 +12833,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X9__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X9__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11156,6 +13019,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X9__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X9__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11315,6 +13205,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11474,6 +13391,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(4)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP16X4__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(4)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11633,6 +13577,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X4__AVX512F, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(4)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X4__AVX512F, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(4)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11792,6 +13763,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP32X4__AVX512F_ACC2, input_offset) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(4)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP32X4__AVX512F_ACC2, zero) {
+    TEST_REQUIRES_X86_AVX512F;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(4)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2);
+      }
+    }
+  }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
@@ -11951,6 +13949,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12110,6 +14135,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X25__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(25)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12269,6 +14321,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12428,6 +14507,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X25__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x25__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12587,6 +14693,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12746,6 +14879,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X9__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(9)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -12905,6 +15065,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13064,6 +15251,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X9__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x9__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13223,6 +15437,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13382,6 +15623,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(4)
+        .channels(channels)
+        .input_offset(112)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP4X4__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 8; channels < 64; channels += 12) {
+        DWConvMicrokernelTester()
+          .cr(4)
+          .kr(4)
+          .channels(channels)
+          .input_offset(112)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up4x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13541,6 +15809,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__PSIMD, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__PSIMD, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13700,6 +15995,33 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__PSIMD_ACC2, input_offset) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(4)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP8X4__PSIMD_ACC2, zero) {
+    TEST_REQUIRES_PSIMD;
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(4)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up8x4__psimd_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
@@ -13804,6 +16126,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X4__WASM, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(4)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X4__WASM, zero) {
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(4)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -13908,6 +16255,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X4__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(4)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X4__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(4)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14054,6 +16426,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X4__WASM, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(4)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X4__WASM, zero) {
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(4)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14200,6 +16597,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X4__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(4)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X4__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 4; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(4)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14304,6 +16726,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X9__WASM, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X9__WASM, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(9)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14408,6 +16855,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X9__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X9__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(9)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14554,6 +17026,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X9__WASM, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X9__WASM, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(9)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14700,6 +17197,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X9__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X9__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(9)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14804,6 +17326,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X25__WASM, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X25__WASM, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(25)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -14908,6 +17455,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP1X25__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP1X25__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 2; channels < 16; channels += 3) {
+        DWConvMicrokernelTester()
+          .cr(1)
+          .kr(25)
+          .channels(channels)
+          .input_offset(48)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -15054,6 +17626,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X25__WASM, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X25__WASM, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(25)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -15200,6 +17797,31 @@
         .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
+
+  TEST(F32_DWCONV_MINMAX_UP2X25__WASM_ACC2, input_offset) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_DWCONV_MINMAX_UP2X25__WASM_ACC2, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 4; channels < 32; channels += 6) {
+        DWConvMicrokernelTester()
+          .cr(2)
+          .kr(25)
+          .channels(channels)
+          .input_offset(80)
+          .zero_index(mz)
+          .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__wasm_acc2, DWConvMicrokernelTester::Variant::Scalar);
+      }
+    }
+  }
 #endif  // XNN_ARCH_WASM
 
 
@@ -15304,6 +17926,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X4__SCALAR, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(4)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X4__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(4)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP1X4__SCALAR_ACC2, c_eq_1) {
   DWConvMicrokernelTester()
@@ -15406,6 +18052,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X4__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(4)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X4__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(4)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR, c_eq_2) {
   DWConvMicrokernelTester()
@@ -15550,6 +18220,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(4)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(4)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR_ACC2, c_eq_2) {
   DWConvMicrokernelTester()
@@ -15694,6 +18388,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(4)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X4__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(4)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x4__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR, c_eq_1) {
   DWConvMicrokernelTester()
@@ -15796,6 +18514,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR_ACC2, c_eq_1) {
   DWConvMicrokernelTester()
@@ -15898,6 +18640,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X9__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR, c_eq_2) {
   DWConvMicrokernelTester()
@@ -16042,6 +18808,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR_ACC2, c_eq_2) {
   DWConvMicrokernelTester()
@@ -16186,6 +18976,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X9__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR, c_eq_1) {
   DWConvMicrokernelTester()
@@ -16288,6 +19102,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR_ACC2, c_eq_1) {
   DWConvMicrokernelTester()
@@ -16390,6 +19228,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP1X25__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up1x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR, c_eq_2) {
   DWConvMicrokernelTester()
@@ -16534,6 +19396,30 @@
   }
 }
 
+TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__scalar, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
 
 TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR_ACC2, c_eq_2) {
   DWConvMicrokernelTester()
@@ -16677,3 +19563,28 @@
       .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
   }
 }
+
+TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR_ACC2, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+  }
+}
+
+TEST(F32_DWCONV_MINMAX_UP2X25__SCALAR_ACC2, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_f32_dwconv_minmax_ukernel_up2x25__scalar_acc2, DWConvMicrokernelTester::Variant::Scalar);
+    }
+  }
+}
\ No newline at end of file