NEON/NEONFMA RAddStoreExpMinusMax micro-kernels

PiperOrigin-RevId: 291547227
diff --git a/test/f32-raddstoreexpminusmax.cc b/test/f32-raddstoreexpminusmax.cc
index 4d4f3dd..57b2b6f 100644
--- a/test/f32-raddstoreexpminusmax.cc
+++ b/test/f32-raddstoreexpminusmax.cc
@@ -17,6 +17,1782 @@
 #include "raddstoreexpminusmax-microkernel-tester.h"
 
 
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X4, elements_eq_4) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(4)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X4, elements_div_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 8; elements < 40; elements += 4) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X4, elements_lt_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 4; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X4, elements_gt_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 5; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8_ACC2, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8_ACC2, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8_ACC2, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X8_ACC2, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC2, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC2, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC2, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC2, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC3, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC3, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC3, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X12_ACC3, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC2, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC2, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC2, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC2, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC4, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC4, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC4, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X16_ACC4, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC2, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC2, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC2, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC2, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC5, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC5, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC5, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_P5_X20_ACC5, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X4, elements_eq_4) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(4)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X4, elements_div_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 8; elements < 40; elements += 4) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X4, elements_lt_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 4; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X4, elements_gt_4) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 5; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8_ACC2, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8_ACC2, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8_ACC2, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X8_ACC2, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC2, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC2, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC2, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC2, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC3, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC3, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC3, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X12_ACC3, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC2, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC2, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC2, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC2, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC4, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC4, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC4, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X16_ACC4, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC2, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC2, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC2, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC2, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC5, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC5, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC5, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEON_LUT64_P2_X20_ACC5, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X4, elements_eq_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(4)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X4, elements_div_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 8; elements < 40; elements += 4) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X4, elements_lt_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 4; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X4, elements_gt_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 5; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8_ACC2, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8_ACC2, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8_ACC2, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X8_ACC2, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC2, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC2, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC2, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC2, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC3, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC3, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC3, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X12_ACC3, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC2, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC2, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC2, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC2, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC4, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC4, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC4, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X16_ACC4, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC2, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC2, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC2, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC2, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC5, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC5, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC5, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_P5_X20_ACC5, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X4, elements_eq_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(4)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X4, elements_div_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 8; elements < 40; elements += 4) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X4, elements_lt_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 4; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X4, elements_gt_4) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 5; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8_ACC2, elements_eq_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(8)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8_ACC2, elements_div_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 16; elements < 80; elements += 8) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8_ACC2, elements_lt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 8; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X8_ACC2, elements_gt_8) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 9; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC2, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC2, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC2, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC2, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC3, elements_eq_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(12)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC3, elements_div_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 24; elements < 120; elements += 12) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC3, elements_lt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 12; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X12_ACC3, elements_gt_12) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 13; elements < 24; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC2, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC2, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC2, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC2, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC4, elements_eq_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(16)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC4, elements_div_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 32; elements < 160; elements += 16) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC4, elements_lt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 16; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X16_ACC4, elements_gt_16) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 17; elements < 32; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC2, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC2, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC2, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC2, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC5, elements_eq_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    RAddStoreExpMinusMaxMicrokernelTester()
+      .elements(20)
+      .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5);
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC5, elements_div_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 40; elements < 200; elements += 20) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC5, elements_lt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 1; elements < 20; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5);
+    }
+  }
+
+  TEST(F32_RADDSTOREEXPMINUSMAX__NEONFMA_LUT64_P2_X20_ACC5, elements_gt_20) {
+    TEST_REQUIRES_ARM_NEON_FMA;
+    for (size_t elements = 21; elements < 40; elements++) {
+      RAddStoreExpMinusMaxMicrokernelTester()
+        .elements(elements)
+        .Test(xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5);
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_RADDSTOREEXPMINUSMAX__SSE2_P5_X4, elements_eq_4) {
     TEST_REQUIRES_X86_SSE2;