Limit direct dependencies on cpuinfo

PiperOrigin-RevId: 272245408
diff --git a/test/f16-gemm.cc b/test/f16-gemm.cc
index 725313d..09a344d 100644
--- a/test/f16-gemm.cc
+++ b/test/f16-gemm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
     GemmMicrokernelTester()
@@ -475,10 +475,10 @@
       .cm_stride(11)
       .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
     GemmMicrokernelTester()
@@ -931,10 +931,10 @@
       .cm_stride(11)
       .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
     GemmMicrokernelTester()
@@ -1387,4 +1387,4 @@
       .cm_stride(11)
       .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
diff --git a/test/f32-argmaxpool.cc b/test/f32-argmaxpool.cc
index 9ff68e3..c1a8a91 100644
--- a/test/f32-argmaxpool.cc
+++ b/test/f32-argmaxpool.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/argmaxpool.h>
 
+#include <xnnpack/argmaxpool.h>
 #include "argmaxpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_ARGMAXPOOL_UP4__SSE2, kc_eq_4_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     auto tester = ArgmaxPoolMicrokernelTester()
@@ -1321,10 +1321,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_ARGMAXPOOL_UP4__PSIMD, kc_eq_4_fulltile) {
     TEST_REQUIRES_PSIMD;
     auto tester = ArgmaxPoolMicrokernelTester()
@@ -2633,7 +2633,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_ARGMAXPOOL_UP4__SCALAR, kc_eq_1_fulltile) {
diff --git a/test/f32-avgpool.cc b/test/f32-avgpool.cc
index 24186aa..4fd1e95 100644
--- a/test/f32-avgpool.cc
+++ b/test/f32-avgpool.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/avgpool.h>
 
+#include <xnnpack/avgpool.h>
 #include "avgpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_AVGPOOL_UP9__NEON, kc_eq_4_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     auto tester = AvgPoolMicrokernelTester()
@@ -942,10 +942,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_AVGPOOL_UP9__SSE2, kc_eq_4_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     auto tester = AvgPoolMicrokernelTester()
@@ -1875,10 +1875,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_AVGPOOL_UP9__PSIMD, kc_eq_4_fulltile) {
     TEST_REQUIRES_PSIMD;
     auto tester = AvgPoolMicrokernelTester()
@@ -2808,7 +2808,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_AVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
diff --git a/test/f32-clamp.cc b/test/f32-clamp.cc
index a1536dd..6abcc53 100644
--- a/test/f32-clamp.cc
+++ b/test/f32-clamp.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/clamp.h>
 
+#include <xnnpack/clamp.h>
 #include "clamp-microkernel-tester.h"
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_CLAMP__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     ClampMicrokernelTester()
@@ -85,7 +85,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_CLAMP__SCALAR, n_eq_2) {
@@ -154,7 +154,7 @@
   }
 }
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_CLAMP__NEON, n_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     ClampMicrokernelTester()
@@ -227,9 +227,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_CLAMP__SSE, n_eq_4) {
     TEST_REQUIRES_X86_SSE2;
     ClampMicrokernelTester()
@@ -302,4 +302,4 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/f32-conv-hwc.cc b/test/f32-conv-hwc.cc
index b44dbe1..94179ff 100644
--- a/test/f32-conv-hwc.cc
+++ b/test/f32-conv-hwc.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/conv.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/conv.h>
 #include "conv-hwc-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     ConvHWCMicrokernelTester()
@@ -290,10 +290,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     ConvHWCMicrokernelTester()
@@ -571,4 +571,4 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
diff --git a/test/f32-conv-hwc2spchw.cc b/test/f32-conv-hwc2spchw.cc
index 6d79a46..51530bb 100644
--- a/test/f32-conv-hwc2spchw.cc
+++ b/test/f32-conv-hwc2spchw.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/conv.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/conv.h>
 #include "conv-hwc2spchw-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     ConvHWC2SpCHWMicrokernelTester()
@@ -290,4 +290,4 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
diff --git a/test/f32-dwconv-spchw.cc b/test/f32-dwconv-spchw.cc
index 76fdcd1..914b1cc 100644
--- a/test/f32-dwconv-spchw.cc
+++ b/test/f32-dwconv-spchw.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/dwconv.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/dwconv.h>
 #include "dwconv-spchw-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_eq_4) {
     TEST_REQUIRES_X86_SSE;
     DWConvSpCHWMicrokernelTester()
@@ -180,9 +180,9 @@
         .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_eq_4) {
     TEST_REQUIRES_X86_SSE;
     DWConvSpCHWMicrokernelTester()
@@ -360,10 +360,10 @@
         .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvSpCHWMicrokernelTester()
@@ -531,10 +531,10 @@
         .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvSpCHWMicrokernelTester()
@@ -712,10 +712,10 @@
         .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvSpCHWMicrokernelTester()
@@ -885,10 +885,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvSpCHWMicrokernelTester()
@@ -1066,4 +1066,4 @@
         .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
diff --git a/test/f32-dwconv.cc b/test/f32-dwconv.cc
index f692a72..c9df99c 100644
--- a/test/f32-dwconv.cc
+++ b/test/f32-dwconv.cc
@@ -11,16 +11,16 @@
 //   Generator: tools/generate-dwconv-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/dwconv.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/dwconv.h>
 #include "dwconv-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA, c_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvMicrokernelTester()
@@ -176,10 +176,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_DWCONV_UP4X9__AARCH64_NEONFMA_CORTEX_A55, c_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvMicrokernelTester()
@@ -344,10 +344,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55);
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_DWCONV_UP4X9__NEONFMA, c_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     DWConvMicrokernelTester()
@@ -503,10 +503,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_DWCONV_UP4X9__NEON, c_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     DWConvMicrokernelTester()
@@ -662,10 +662,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_DWCONV_UP4X25__SSE, c_eq_4) {
     TEST_REQUIRES_X86_SSE;
     DWConvMicrokernelTester()
@@ -821,10 +821,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x25__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_DWCONV_UP4X9__SSE, c_eq_4) {
     TEST_REQUIRES_X86_SSE;
     DWConvMicrokernelTester()
@@ -980,10 +980,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_DWCONV_UP4X4__SSE, c_eq_4) {
     TEST_REQUIRES_X86_SSE;
     DWConvMicrokernelTester()
@@ -1139,10 +1139,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x4__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_DWCONV_UP4X25__PSIMD, c_eq_4) {
     TEST_REQUIRES_PSIMD;
     DWConvMicrokernelTester()
@@ -1298,10 +1298,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x25__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_DWCONV_UP4X9__PSIMD, c_eq_4) {
     TEST_REQUIRES_PSIMD;
     DWConvMicrokernelTester()
@@ -1457,10 +1457,10 @@
         .Test(xnn_f32_dwconv_ukernel_up4x9__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_DWCONV_UP4X4__PSIMD, c_eq_4) {
     TEST_REQUIRES_PSIMD;
     DWConvMicrokernelTester()
@@ -1616,7 +1616,7 @@
         .Test(xnn_f32_dwconv_ukernel_up4x4__psimd, DWConvMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_DWCONV_UP1X4__SCALAR, c_eq_1) {
diff --git a/test/f32-gavgpool-spchw.cc b/test/f32-gavgpool-spchw.cc
index f12e8e4..89ee4ea 100644
--- a/test/f32-gavgpool-spchw.cc
+++ b/test/f32-gavgpool-spchw.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/gavgpool.h>
 
+#include <xnnpack/gavgpool.h>
 #include "gavgpool-spchw-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GAVGPOOL_SPCHW__NEON_X4, elements_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     GAvgPoolSpCHWMicrokernelTester()
@@ -108,10 +108,10 @@
         .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GAVGPOOL_SPCHW__SSE_X4, elements_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GAvgPoolSpCHWMicrokernelTester()
@@ -207,4 +207,4 @@
         .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/f32-gavgpool.cc b/test/f32-gavgpool.cc
index 95e44cc..95c3f5c 100644
--- a/test/f32-gavgpool.cc
+++ b/test/f32-gavgpool.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/gavgpool.h>
 
+#include <xnnpack/gavgpool.h>
 #include "gavgpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GAVGPOOL_UP7__NEON, n_eq_4_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     GAvgPoolMicrokernelTester()
@@ -428,10 +428,10 @@
       } 
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GAVGPOOL_UP7__SSE2, n_eq_4_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     GAvgPoolMicrokernelTester()
@@ -847,10 +847,10 @@
       } 
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_GAVGPOOL_UP7__PSIMD, n_eq_4_fulltile) {
     TEST_REQUIRES_PSIMD;
     GAvgPoolMicrokernelTester()
@@ -1266,7 +1266,7 @@
       } 
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_GAVGPOOL_UP7__SCALAR, n_eq_1_fulltile) {
diff --git a/test/f32-gemm.cc b/test/f32-gemm.cc
index 9bcb043..e8858ec 100644
--- a/test/f32-gemm.cc
+++ b/test/f32-gemm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_1X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -520,10 +520,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1021,10 +1021,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1522,10 +1522,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2023,10 +2023,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2524,10 +2524,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_5x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3025,10 +3025,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3526,10 +3526,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4027,10 +4027,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4528,10 +4528,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5029,10 +5029,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5485,10 +5485,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5941,10 +5941,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -6397,10 +6397,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -6853,10 +6853,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X12__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7309,10 +7309,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemm_ukernel_4x12__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_1X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7765,10 +7765,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X2__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -8221,10 +8221,10 @@
       .cm_stride(5)
       .Test(xnn_f32_gemm_ukernel_4x2__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -8677,10 +8677,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__NEON_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -9133,10 +9133,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__neon_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_5X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -9589,10 +9589,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_5x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -10045,10 +10045,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_1X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10501,10 +10501,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X12__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10957,10 +10957,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemm_ukernel_4x12__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -11413,10 +11413,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_4X8__NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -11869,10 +11869,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_5X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -12325,10 +12325,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_5x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMM_6X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -12781,10 +12781,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_1X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13135,10 +13135,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_4X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13489,10 +13489,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_1X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13945,10 +13945,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_4X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -14401,10 +14401,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_1X8S4__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -14857,10 +14857,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8s4__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMM_4X8S4__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -15313,10 +15313,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8s4__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_1X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15667,10 +15667,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_4X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16021,10 +16021,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_6X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16375,10 +16375,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_1X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16831,10 +16831,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_4X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -17287,10 +17287,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_6X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -17743,10 +17743,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_1X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -18199,10 +18199,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_1x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_4X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -18655,10 +18655,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_4x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMM_6X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -19111,7 +19111,7 @@
       .cm_stride(11)
       .Test(xnn_f32_gemm_ukernel_6x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_GEMM_1X4__SCALAR, k_eq_1) {
diff --git a/test/f32-gemminc.cc b/test/f32-gemminc.cc
index ade257d..f7774a1 100644
--- a/test/f32-gemminc.cc
+++ b/test/f32-gemminc.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_1X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -520,10 +520,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1021,10 +1021,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1522,10 +1522,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2023,10 +2023,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2524,10 +2524,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_5x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3025,10 +3025,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3526,10 +3526,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__aarch64_neonfma_cortex_a73);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4027,10 +4027,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4528,10 +4528,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemminc_ukernel_1x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5029,10 +5029,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemminc_ukernel_4x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5485,10 +5485,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__aarch64_neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -5941,10 +5941,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__aarch64_neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -6397,10 +6397,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__aarch64_neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -6853,10 +6853,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__aarch64_neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X12__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7309,10 +7309,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemminc_ukernel_4x12__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_1X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7765,10 +7765,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -8221,10 +8221,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__NEON_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -8677,10 +8677,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__neon_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_5X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -9133,10 +9133,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_5x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -9589,10 +9589,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_1X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10045,10 +10045,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X12__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10501,10 +10501,10 @@
       .cm_stride(17)
       .Test(xnn_f32_gemminc_ukernel_4x12__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10957,10 +10957,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_4X8__NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -11413,10 +11413,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_5X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -11869,10 +11869,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_5x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_GEMMINC_6X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -12325,10 +12325,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_1X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -12679,10 +12679,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_4X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13033,10 +13033,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_1X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13489,10 +13489,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_4X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -13945,10 +13945,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_1X8S4__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -14401,10 +14401,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8s4__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_GEMMINC_4X8S4__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -14857,10 +14857,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8s4__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_1X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15211,10 +15211,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_4X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15565,10 +15565,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_6X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15919,10 +15919,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_1X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16375,10 +16375,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_4X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16831,10 +16831,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_6X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -17287,10 +17287,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_1X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -17743,10 +17743,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_1x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_4X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -18199,10 +18199,10 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_4x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_GEMMINC_6X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -18655,7 +18655,7 @@
       .cm_stride(11)
       .Test(xnn_f32_gemminc_ukernel_6x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_GEMMINC_1X4__SCALAR, k_eq_1) {
diff --git a/test/f32-hswish.cc b/test/f32-hswish.cc
index 97eee7b..05a3861 100644
--- a/test/f32-hswish.cc
+++ b/test/f32-hswish.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/hswish.h>
 
+#include <xnnpack/hswish.h>
 #include "hswish-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_HSWISH__NEON, n_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     HSwishMicrokernelTester()
@@ -102,10 +102,10 @@
         .Test(xnn_f32_hswish_ukernel__neonfma);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_HSWISH__SSE, n_eq_4) {
     TEST_REQUIRES_X86_SSE2;
     HSwishMicrokernelTester()
@@ -150,10 +150,10 @@
         .Test(xnn_f32_hswish_ukernel__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_HSWISH__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     HSwishMicrokernelTester()
@@ -198,7 +198,7 @@
         .Test(xnn_f32_hswish_ukernel__psimd, HSwishMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_HSWISH__SCALAR, n_eq_1) {
diff --git a/test/f32-igemm.cc b/test/f32-igemm.cc
index c665194..a64615d 100644
--- a/test/f32-igemm.cc
+++ b/test/f32-igemm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -518,10 +518,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1017,10 +1017,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1516,10 +1516,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2015,10 +2015,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_5x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -2514,10 +2514,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a73);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A57, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3013,10 +3013,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a57);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -3512,10 +3512,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4011,10 +4011,10 @@
       .cm_stride(17)
       .Test(xnn_f32_igemm_ukernel_1x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -4510,10 +4510,10 @@
       .cm_stride(17)
       .Test(xnn_f32_igemm_ukernel_4x12__aarch64_neonfma_cortex_a53);
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X12__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -4978,10 +4978,10 @@
       .cm_stride(17)
       .Test(xnn_f32_igemm_ukernel_4x12__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_1X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -5446,10 +5446,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X2__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -5914,10 +5914,10 @@
       .cm_stride(5)
       .Test(xnn_f32_igemm_ukernel_4x2__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X4__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -6382,10 +6382,10 @@
       .cm_stride(7)
       .Test(xnn_f32_igemm_ukernel_4x4__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X8__NEON_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -6850,10 +6850,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__neon_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7318,10 +7318,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_6X8__NEON_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -7786,10 +7786,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__neon_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X12__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -8254,10 +8254,10 @@
       .cm_stride(17)
       .Test(xnn_f32_igemm_ukernel_4x12__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X2__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -8722,10 +8722,10 @@
       .cm_stride(5)
       .Test(xnn_f32_igemm_ukernel_4x2__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X4__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -9190,10 +9190,10 @@
       .cm_stride(7)
       .Test(xnn_f32_igemm_ukernel_4x4__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X8__NEONFMA_LD128, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -9658,10 +9658,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__neonfma_ld128);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_4X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10126,10 +10126,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_IGEMM_6X8__NEONFMA_LD64, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -10594,10 +10594,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__neonfma_ld64);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_IGEMM_1X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -10992,10 +10992,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_IGEMM_4X8__SSE_LOAD1, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -11390,10 +11390,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__sse_load1);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_IGEMM_1X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -11858,10 +11858,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_IGEMM_4X8__SSE_DUP, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -12326,10 +12326,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__sse_dup);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_IGEMM_4X2C4__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -12794,10 +12794,10 @@
       .cm_stride(5)
       .Test(xnn_f32_igemm_ukernel_4x2c4__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_4X2C4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -13262,10 +13262,10 @@
       .cm_stride(5)
       .Test(xnn_f32_igemm_ukernel_4x2c4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_1X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -13660,10 +13660,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_4X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -14058,10 +14058,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_6X8__PSIMD_LOADSPLAT, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -14456,10 +14456,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__psimd_loadsplat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_1X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -14924,10 +14924,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_4X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15392,10 +15392,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_6X8__PSIMD_SPLAT, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -15860,10 +15860,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8__psimd_splat, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_1X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16328,10 +16328,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_1x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_4X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -16796,10 +16796,10 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_4x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_IGEMM_6X8S4__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -17264,7 +17264,7 @@
       .cm_stride(11)
       .Test(xnn_f32_igemm_ukernel_6x8s4__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_IGEMM_1X4__SCALAR, k_eq_1) {
diff --git a/test/f32-maxpool.cc b/test/f32-maxpool.cc
index da38a88..159a907 100644
--- a/test/f32-maxpool.cc
+++ b/test/f32-maxpool.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/maxpool.h>
 
+#include <xnnpack/maxpool.h>
 #include "maxpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(SMAXPOOL_9P8Q__SSE, kc_eq_4_unipass_fulltile) {
     TEST_REQUIRES_X86_SSE;
     auto tester = MaxPoolMicrokernelTester()
@@ -1214,10 +1214,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(SMAXPOOL_9P8Q__PSIMD, kc_eq_4_unipass_fulltile) {
     TEST_REQUIRES_PSIMD;
     auto tester = MaxPoolMicrokernelTester()
@@ -2419,7 +2419,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(SMAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_fulltile) {
diff --git a/test/f32-pavgpool.cc b/test/f32-pavgpool.cc
index c4c2f90..4c7be12 100644
--- a/test/f32-pavgpool.cc
+++ b/test/f32-pavgpool.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/pavgpool.h>
 
+#include <xnnpack/pavgpool.h>
 #include "avgpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_PAVGPOOL_UP9__NEON, kc_eq_4_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     auto tester = AvgPoolMicrokernelTester()
@@ -942,10 +942,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_PAVGPOOL_UP9__SSE2, kc_eq_4_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     auto tester = AvgPoolMicrokernelTester()
@@ -1875,10 +1875,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_PAVGPOOL_UP9__PSIMD, kc_eq_4_fulltile) {
     TEST_REQUIRES_PSIMD;
     auto tester = AvgPoolMicrokernelTester()
@@ -2808,7 +2808,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_PAVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
diff --git a/test/f32-ppmm.cc b/test/f32-ppmm.cc
index 659dd52..773806b 100644
--- a/test/f32-ppmm.cc
+++ b/test/f32-ppmm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_PPMM_4X8__NEON, k_eq_1) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -357,10 +357,10 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_4x8__neon);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_PPMM_4X8__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -695,10 +695,10 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_4x8__neonfma);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_PPMM_8X8__NEON, k_eq_1) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -1033,10 +1033,10 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_8x8__neon);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_PPMM_8X8__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     GemmMicrokernelTester()
@@ -1371,10 +1371,10 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_8x8__neonfma);
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_PPMM_4X8__SSE, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     GemmMicrokernelTester()
@@ -1709,10 +1709,10 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_4x8__sse);
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_PPMM_4X8__PSIMD, k_eq_1) {
     TEST_REQUIRES_PSIMD;
     GemmMicrokernelTester()
@@ -2047,7 +2047,7 @@
       .cm_stride(11)
       .Test(xnn_f32_ppmm_ukernel_4x8__psimd, GemmMicrokernelTester::Variant::Scalar);
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_PPMM_4X2__SCALAR, k_eq_1) {
diff --git a/test/f32-prelu.cc b/test/f32-prelu.cc
index 43e248d..306601d 100644
--- a/test/f32-prelu.cc
+++ b/test/f32-prelu.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/prelu.h>
 
+#include <xnnpack/prelu.h>
 #include "prelu-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_PRELU_X4__SSE2, fulltile_n_eq_4) {
     TEST_REQUIRES_X86_SSE2;
     PReLUMicrokernelTester()
@@ -181,10 +181,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_PRELU_X4__PSIMD, fulltile_n_eq_4) {
     TEST_REQUIRES_PSIMD;
     PReLUMicrokernelTester()
@@ -353,7 +353,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_PRELU_X4__SCALAR, fulltile_n_eq_1) {
diff --git a/test/f32-rmax.cc b/test/f32-rmax.cc
index 5661f32..5d8bf06 100644
--- a/test/f32-rmax.cc
+++ b/test/f32-rmax.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/rmax.h>
 
+#include <xnnpack/rmax.h>
 #include "rmax-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_RMAX__NEON, n_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t n = 1; n < 16; n++) {
@@ -46,9 +46,9 @@
         .Test(xnn_f32_rmax_ukernel__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_RMAX__SSE, n_lt_16) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t n = 1; n < 16; n++) {
@@ -150,7 +150,7 @@
         .Test(xnn_f32_rmax_ukernel__avx512f);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(F32_RMAX__SCALAR, n_lt_4) {
   for (size_t n = 1; n < 4; n++) {
diff --git a/test/f32-spmm.cc b/test/f32-spmm.cc
index 535457e..cb3f72a 100644
--- a/test/f32-spmm.cc
+++ b/test/f32-spmm.cc
@@ -8,16 +8,16 @@
 //   Generator: tools/generate-spmm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/spmm.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/spmm.h>
 #include "spmm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_4X1__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -179,10 +179,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_4X2__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -389,10 +389,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_4X4__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -599,10 +599,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_4X1__NEONFMA_PIPELINED, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -764,10 +764,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_4X1__NEONFMA_UNROLL2, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -957,10 +957,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_8X1__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -1122,10 +1122,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_8X2__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -1332,10 +1332,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_8X4__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -1542,10 +1542,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_8X1__NEONFMA_PIPELINED, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -1707,10 +1707,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_8X1__NEONFMA_UNROLL2, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -1900,10 +1900,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_12X1__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -2065,10 +2065,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_12X2__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -2275,10 +2275,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_12X4__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -2485,10 +2485,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_16X1__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -2650,10 +2650,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_16X2__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -2860,10 +2860,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_16X4__NEONFMA, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -3070,10 +3070,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_16X1__NEONFMA_PIPELINED, k_eq_1) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -3235,10 +3235,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM64
   TEST(F32_SPMM_16X1__NEONFMA_UNROLL2, k_eq_2) {
     TEST_REQUIRES_ARM_NEON_FMA;
     SpMMMicrokernelTester()
@@ -3428,10 +3428,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_SPMM_4X1__SSE, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     SpMMMicrokernelTester()
@@ -3593,10 +3593,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_SPMM_8X1__SSE, k_eq_1) {
     TEST_REQUIRES_X86_SSE;
     SpMMMicrokernelTester()
@@ -3758,7 +3758,7 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 TEST(F32_SPMM_1X1__SCALAR, k_eq_1) {
diff --git a/test/f32-vadd.cc b/test/f32-vadd.cc
index de89b23..cc91e5e 100644
--- a/test/f32-vadd.cc
+++ b/test/f32-vadd.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/vadd.h>
 
+#include <xnnpack/vadd.h>
 #include "vadd-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_VADD__SSE, n_eq_4) {
     TEST_REQUIRES_X86_SSE;
     VAddMicrokernelTester()
@@ -116,10 +116,10 @@
         .Test(xnn_f32_vadd_ukernel__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_VADD__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     VAddMicrokernelTester()
@@ -223,7 +223,7 @@
         .Test(xnn_f32_vadd_ukernel__psimd, VAddMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_VADD__SCALAR, n_eq_1) {
diff --git a/test/f32-vmul.cc b/test/f32-vmul.cc
index 81c6200..6230023 100644
--- a/test/f32-vmul.cc
+++ b/test/f32-vmul.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/vmul.h>
 
+#include <xnnpack/vmul.h>
 #include "vmul-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_VMUL__SSE, n_eq_4) {
     TEST_REQUIRES_X86_SSE;
     VMulMicrokernelTester()
@@ -116,10 +116,10 @@
         .Test(xnn_f32_vmul_ukernel__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_VMUL__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     VMulMicrokernelTester()
@@ -223,7 +223,7 @@
         .Test(xnn_f32_vmul_ukernel__psimd, VMulMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_VMUL__SCALAR, n_eq_1) {
diff --git a/test/f32-vmulcaddc.cc b/test/f32-vmulcaddc.cc
index b925e24..ca1ac82 100644
--- a/test/f32-vmulcaddc.cc
+++ b/test/f32-vmulcaddc.cc
@@ -8,16 +8,16 @@
 //   Generator: tools/generate-vmulcaddc-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/vmulcaddc.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/vmulcaddc.h>
 #include "vmulcaddc-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_VMULCADDC_C4__NEONFMA_X2, c_eq_4) {
     TEST_REQUIRES_ARM_NEON_FMA;
     VMulCAddCMicrokernelTester()
@@ -155,10 +155,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(F32_VMULCADDC_C4__NEON_X2, c_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     VMulCAddCMicrokernelTester()
@@ -296,10 +296,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_VMULCADDC_C4__SSE_X2, c_eq_4) {
     TEST_REQUIRES_X86_SSE;
     VMulCAddCMicrokernelTester()
@@ -437,10 +437,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(F32_VMULCADDC_C4__PSIMD_X2, c_eq_4) {
     TEST_REQUIRES_PSIMD;
     VMulCAddCMicrokernelTester()
@@ -578,7 +578,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(F32_VMULCADDC_C1__SCALAR_X2, c_eq_1) {
diff --git a/test/f32-vsub.cc b/test/f32-vsub.cc
index 65cd8db..9f7b1a8 100644
--- a/test/f32-vsub.cc
+++ b/test/f32-vsub.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/vsub.h>
 
+#include <xnnpack/vsub.h>
 #include "vsub-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(F32_VSUB__SSE, n_eq_4) {
     TEST_REQUIRES_X86_SSE;
     VSubMicrokernelTester()
@@ -104,10 +104,10 @@
         .Test(xnn_f32_vsub_ukernel__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(F32_VSUB__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     VSubMicrokernelTester()
@@ -199,7 +199,7 @@
         .Test(xnn_f32_vsub_ukernel__psimd, VSubMicrokernelTester::Variant::Scalar);
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(F32_VSUB__SCALAR, n_eq_1) {
diff --git a/test/q8-avgpool.cc b/test/q8-avgpool.cc
index 1c7a8d9..86104cd 100644
--- a/test/q8-avgpool.cc
+++ b/test/q8-avgpool.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/avgpool.h>
 
+#include <xnnpack/avgpool.h>
 #include "avgpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_AVGPOOL_UP9__NEON, kc_eq_8_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     auto tester = AvgPoolMicrokernelTester()
@@ -1117,9 +1117,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_AVGPOOL_UP9__SSE2, kc_eq_8_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     auto tester = AvgPoolMicrokernelTester()
@@ -2221,7 +2221,7 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
   auto tester = AvgPoolMicrokernelTester()
diff --git a/test/q8-dwconv.cc b/test/q8-dwconv.cc
index 4786906..8a0c41a 100644
--- a/test/q8-dwconv.cc
+++ b/test/q8-dwconv.cc
@@ -11,16 +11,16 @@
 //   Generator: tools/generate-dwconv-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/dwconv.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/dwconv.h>
 #include "dwconv-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM
+#if XNN_ARCH_ARM
   TEST(Q8_DWCONV_UP8X9__AARCH32_NEON, c_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     DWConvMicrokernelTester()
@@ -204,10 +204,10 @@
         .Test(xnn_q8_dwconv_ukernel_up8x9__aarch32_neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM
+#endif  // XNN_ARCH_ARM
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_DWCONV_UP8X9__NEON, c_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     DWConvMicrokernelTester()
@@ -391,10 +391,10 @@
         .Test(xnn_q8_dwconv_ukernel_up8x9__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_DWCONV_UP8X9__SSE2, c_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     DWConvMicrokernelTester()
@@ -578,7 +578,7 @@
         .Test(xnn_q8_dwconv_ukernel_up8x9__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 TEST(Q8_DWCONV_UP1X9__SCALAR, c_eq_1) {
diff --git a/test/q8-gavgpool.cc b/test/q8-gavgpool.cc
index 779702d..79a27bf 100644
--- a/test/q8-gavgpool.cc
+++ b/test/q8-gavgpool.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/gavgpool.h>
 
+#include <xnnpack/gavgpool.h>
 #include "gavgpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_GAVGPOOL_UP7__NEON, n_eq_8_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     GAvgPoolMicrokernelTester()
@@ -775,9 +775,9 @@
       } 
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_GAVGPOOL_UP7__SSE2, n_eq_8_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     GAvgPoolMicrokernelTester()
@@ -1537,7 +1537,7 @@
       } 
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(Q8_GAVGPOOL_UP7__SCALAR, n_eq_1_fulltile) {
   GAvgPoolMicrokernelTester()
diff --git a/test/q8-gemm.cc b/test/q8-gemm.cc
index 796cb7d..847e9c2 100644
--- a/test/q8-gemm.cc
+++ b/test/q8-gemm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_GEMM_4X8__NEON, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -524,10 +524,10 @@
         .Test(xnn_q8_gemm_ukernel_4x8__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_GEMM_8X8__NEON, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -1029,10 +1029,10 @@
         .Test(xnn_q8_gemm_ukernel_8x8__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_GEMM_2X4C8__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
@@ -1534,10 +1534,10 @@
         .Test(xnn_q8_gemm_ukernel_2x4c8__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_GEMM_4X4C2__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
@@ -2039,7 +2039,7 @@
         .Test(xnn_q8_gemm_ukernel_4x4c2__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 TEST(Q8_GEMM_2X2__SCALAR, k_eq_1) {
diff --git a/test/q8-igemm.cc b/test/q8-igemm.cc
index bc8f45a..e10829c 100644
--- a/test/q8-igemm.cc
+++ b/test/q8-igemm.cc
@@ -11,18 +11,18 @@
 //   Generator: tools/generate-gemm-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
 #include <xnnpack/gemm.h>
 #include <xnnpack/igemm.h>
 #include <xnnpack/ppmm.h>
-#include <xnnpack/isa-checks.h>
-
 #include "gemm-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_IGEMM_4X8__NEON, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -536,10 +536,10 @@
         .Test(xnn_q8_igemm_ukernel_4x8__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_IGEMM_8X8__NEON, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
@@ -1053,10 +1053,10 @@
         .Test(xnn_q8_igemm_ukernel_8x8__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_IGEMM_4X4C2__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
@@ -1570,7 +1570,7 @@
         .Test(xnn_q8_igemm_ukernel_4x4c2__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 TEST(Q8_IGEMM_2X2__SCALAR, k_eq_1) {
diff --git a/test/q8-vadd.cc b/test/q8-vadd.cc
index b075792..5cd7715 100644
--- a/test/q8-vadd.cc
+++ b/test/q8-vadd.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/vadd.h>
 
+#include <xnnpack/vadd.h>
 #include "vadd-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(Q8_VADD__SSE2, n_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     VAddMicrokernelTester()
@@ -183,9 +183,9 @@
         .Test(xnn_q8_vadd_ukernel__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(Q8_VADD__NEON, n_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     VAddMicrokernelTester()
@@ -353,7 +353,7 @@
         .Test(xnn_q8_vadd_ukernel__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 TEST(Q8_VADD__SCALAR, n_eq_1) {
   VAddMicrokernelTester()
diff --git a/test/requantization.cc b/test/requantization.cc
index a5a4edf..2337722 100644
--- a/test/requantization.cc
+++ b/test/requantization.cc
@@ -10,10 +10,11 @@
 #include <cstddef>
 #include <cstdlib>
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
-#include <xnnpack/requantization-stubs.h>
 
+#include <xnnpack/common.h>
+
+#include <xnnpack/requantization-stubs.h>
 #include "requantization-tester.h"
 
 
@@ -398,750 +399,746 @@
 }
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  /*
+   * Precise SSE2 implementation using floating-point shuffle.
+   */
 
-/*
- * Precise SSE2 implementation using floating-point shuffle.
- */
-
-TEST(PRECISE__SSE2, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
-    RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__sse2);
-  }
-}
-
-TEST(PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(PRECISE__SSE2, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_precise__sse2);
     }
   }
-}
 
-TEST(PRECISE__SSE2, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
+  TEST(PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_precise__sse2);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE2, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
+  TEST(PRECISE__SSE2, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE2, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
+  TEST(PRECISE__SSE2, divide_by_po2_with_rounding_down) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE2, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__sse2);
-}
+  TEST(PRECISE__SSE2, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
+      }
+    }
+  }
 
-TEST(PRECISE__SSE2, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__sse2);
-}
-
-
-/*
- * Precise SSSE3 implementation using floating-point shuffle.
- */
-
-TEST(PRECISE__SSSE3, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(PRECISE__SSE2, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+      .testSpecialCases(xnn_requantize_precise__sse2);
   }
-}
 
-TEST(PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(PRECISE__SSE2, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesPrecise(xnn_requantize_precise__sse2);
+  }
+
+
+  /*
+   * Precise SSSE3 implementation using floating-point shuffle.
+   */
+
+  TEST(PRECISE__SSSE3, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_precise__ssse3);
     }
   }
-}
 
-TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
+  TEST(PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
+  TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
+  TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSSE3, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__ssse3);
-}
+  TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
+      }
+    }
+  }
 
-TEST(PRECISE__SSSE3, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__ssse3);
-}
-
-
-/*
- * Precise SSE4.1 implementation using static blend instruction.
- */
-
-TEST(PRECISE__SSE4, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(PRECISE__SSSE3, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__sse4);
+      .testSpecialCases(xnn_requantize_precise__ssse3);
   }
-}
 
-TEST(PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(PRECISE__SSSE3, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesPrecise(xnn_requantize_precise__ssse3);
+  }
+
+
+  /*
+   * Precise SSE4.1 implementation using static blend instruction.
+   */
+
+  TEST(PRECISE__SSE4, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_precise__sse4);
     }
   }
-}
 
-TEST(PRECISE__SSE4, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
+  TEST(PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_precise__sse4);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE4, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
+  TEST(PRECISE__SSE4, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE4, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
+  TEST(PRECISE__SSE4, divide_by_po2_with_rounding_down) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
+      }
     }
   }
-}
 
-TEST(PRECISE__SSE4, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__sse4);
-}
+  TEST(PRECISE__SSE4, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
+      }
+    }
+  }
 
-TEST(PRECISE__SSE4, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__sse4);
-}
-
-
-/*
- * FP32-based x86 SSE2 implementation.
- */
-
-TEST(FP32__SSE2, random_cases) {
-  RequantizationTester()
-    .iterations(1000)
-    .testRandomCasesApproximate(xnn_requantize_fp32__sse2);
-}
-
-
-/*
- * Q31-based x86 SSE2 implementation.
- */
-
-TEST(Q31__SSE2, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(PRECISE__SSE4, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_q31__sse2);
+      .testSpecialCases(xnn_requantize_precise__sse4);
   }
-}
 
-TEST(Q31__SSE2, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(PRECISE__SSE4, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesPrecise(xnn_requantize_precise__sse4);
+  }
+
+
+  /*
+   * FP32-based x86 SSE2 implementation.
+   */
+
+  TEST(FP32__SSE2, random_cases) {
+    RequantizationTester()
+      .iterations(1000)
+      .testRandomCasesApproximate(xnn_requantize_fp32__sse2);
+  }
+
+
+  /*
+   * Q31-based x86 SSE2 implementation.
+   */
+
+  TEST(Q31__SSE2, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_q31__sse2);
     }
   }
-}
 
-TEST(Q31__SSE2, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
+  TEST(Q31__SSE2, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_q31__sse2);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(Q31__SSE2, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
+  TEST(Q31__SSE2, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
+      }
     }
   }
-}
 
-TEST(Q31__SSE2, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_q31__sse2);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(Q31__SSE2, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_q31__sse2);
-}
+  TEST(Q31__SSE2, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
+      }
+    }
+  }
 
-TEST(Q31__SSE2, random_match_gemmlowp) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
-}
-
-
-/*
- * Q31-based x86 SSSE3 implementation.
- */
-
-TEST(Q31__SSSE3, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(Q31__SSE2, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+      .testSpecialCases(xnn_requantize_q31__sse2);
   }
-}
 
-TEST(Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(Q31__SSE2, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_q31__sse2);
+  }
+
+  TEST(Q31__SSE2, random_match_gemmlowp) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
+  }
+
+
+  /*
+   * Q31-based x86 SSSE3 implementation.
+   */
+
+  TEST(Q31__SSSE3, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_q31__ssse3);
     }
   }
-}
 
-TEST(Q31__SSSE3, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
+  TEST(Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(Q31__SSSE3, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
+  TEST(Q31__SSSE3, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
+      }
     }
   }
-}
 
-TEST(Q31__SSSE3, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_q31__ssse3);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(Q31__SSSE3, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_q31__ssse3);
-}
+  TEST(Q31__SSSE3, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
+      }
+    }
+  }
 
-TEST(Q31__SSSE3, random_match_gemmlowp) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
-}
-
-
-/*
- * Q31-based x86 SSE4 implementation.
- */
-
-TEST(Q31__SSE4, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(Q31__SSSE3, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_q31__sse4);
+      .testSpecialCases(xnn_requantize_q31__ssse3);
   }
-}
 
-TEST(Q31__SSE4, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(Q31__SSSE3, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_q31__ssse3);
+  }
+
+  TEST(Q31__SSSE3, random_match_gemmlowp) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
+  }
+
+
+  /*
+   * Q31-based x86 SSE4 implementation.
+   */
+
+  TEST(Q31__SSE4, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_q31__sse4);
     }
   }
-}
 
-TEST(Q31__SSE4, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
+  TEST(Q31__SSE4, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_q31__sse4);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(Q31__SSE4, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
+  TEST(Q31__SSE4, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
+      }
     }
   }
-}
 
-TEST(Q31__SSE4, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_q31__sse4);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(Q31__SSE4, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_q31__sse4);
-}
+  TEST(Q31__SSE4, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
+      }
+    }
+  }
 
-TEST(Q31__SSE4, random_match_gemmlowp) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
-}
-
-
-/*
- * x86 SSE2 implementation from gemmlowp.
- */
-
-TEST(GEMMLOWP__SSE2, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(Q31__SSE4, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+      .testSpecialCases(xnn_requantize_q31__sse4);
   }
-}
 
-TEST(GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(Q31__SSE4, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_q31__sse4);
+  }
+
+  TEST(Q31__SSE4, random_match_gemmlowp) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
+  }
+
+
+  /*
+   * x86 SSE2 implementation from gemmlowp.
+   */
+
+  TEST(GEMMLOWP__SSE2, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
     }
   }
-}
 
-TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
+  TEST(GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
+  TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
+      }
     }
   }
-}
 
-TEST(GEMMLOWP__SSE2, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_gemmlowp__sse2);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(GEMMLOWP__SSE2, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
-}
+  TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
+      }
+    }
+  }
 
-
-/*
- * x86 SSSE3 implementation from gemmlowp.
- */
-
-TEST(GEMMLOWP__SSSE3, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(GEMMLOWP__SSE2, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+      .testSpecialCases(xnn_requantize_gemmlowp__sse2);
   }
-}
 
-TEST(GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(GEMMLOWP__SSE2, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
+  }
+
+
+  /*
+   * x86 SSSE3 implementation from gemmlowp.
+   */
+
+  TEST(GEMMLOWP__SSSE3, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
     }
   }
-}
 
-TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
+  TEST(GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
+  TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
+      }
     }
   }
-}
 
-TEST(GEMMLOWP__SSSE3, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_gemmlowp__ssse3);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(GEMMLOWP__SSSE3, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
-}
+  TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
+      }
+    }
+  }
 
-
-/*
- * x86 SSE4 implementation from gemmlowp.
- */
-
-TEST(GEMMLOWP__SSE4, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(GEMMLOWP__SSSE3, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+      .testSpecialCases(xnn_requantize_gemmlowp__ssse3);
   }
-}
 
-TEST(GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(GEMMLOWP__SSSE3, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
+  }
+
+
+  /*
+   * x86 SSE4 implementation from gemmlowp.
+   */
+
+  TEST(GEMMLOWP__SSE4, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
     }
   }
-}
 
-TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
+  TEST(GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
+  TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
+      }
     }
   }
-}
 
-TEST(GEMMLOWP__SSE4, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_gemmlowp__sse4);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(GEMMLOWP__SSE4, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
-}
+  TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
+      }
+    }
+  }
 
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
-
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-
-/*
- * Precise ARM NEON implementation.
- */
-
-TEST(PRECISE__NEON, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(GEMMLOWP__SSE4, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__neon);
+      .testSpecialCases(xnn_requantize_gemmlowp__sse4);
   }
-}
 
-TEST(PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(GEMMLOWP__SSE4, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  /*
+   * Precise ARM NEON implementation.
+   */
+
+  TEST(PRECISE__NEON, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_precise__neon);
     }
   }
-}
 
-TEST(PRECISE__NEON, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
+  TEST(PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_precise__neon);
+      }
     }
   }
-}
 
-TEST(PRECISE__NEON, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
+  TEST(PRECISE__NEON, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
+      }
     }
   }
-}
 
-TEST(PRECISE__NEON, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
+  TEST(PRECISE__NEON, divide_by_po2_with_rounding_down) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
+      }
     }
   }
-}
 
-TEST(PRECISE__NEON, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__neon);
-}
+  TEST(PRECISE__NEON, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
+      }
+    }
+  }
 
-TEST(PRECISE__NEON, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__neon);
-}
-
-
-/*
- * FP32-based ARM NEON implementation.
- */
-
-TEST(FP32__NEON, random_cases) {
-  RequantizationTester()
-    .iterations(1000)
-    .testRandomCasesApproximate(xnn_requantize_fp32__neon);
-}
-
-
-/*
- * Q31-based ARM NEON implementation.
- */
-
-TEST(Q31__NEON, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(PRECISE__NEON, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_q31__neon);
+      .testSpecialCases(xnn_requantize_precise__neon);
   }
-}
 
-TEST(Q31__NEON, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  TEST(PRECISE__NEON, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesPrecise(xnn_requantize_precise__neon);
+  }
+
+
+  /*
+   * FP32-based ARM NEON implementation.
+   */
+
+  TEST(FP32__NEON, random_cases) {
+    RequantizationTester()
+      .iterations(1000)
+      .testRandomCasesApproximate(xnn_requantize_fp32__neon);
+  }
+
+
+  /*
+   * Q31-based ARM NEON implementation.
+   */
+
+  TEST(Q31__NEON, exact_divide_by_po2) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
         .s(s)
         .testExactDivideByPO2(xnn_requantize_q31__neon);
     }
   }
-}
 
-TEST(Q31__NEON, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
+  TEST(Q31__NEON, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testExactDivideByPO2(xnn_requantize_q31__neon);
+      }
     }
   }
-}
 
-/* No rounding down Test - it fails because of upward bias in multiplication */
-
-TEST(Q31__NEON, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
+  TEST(Q31__NEON, divide_by_po2_with_rounding_up) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
+      }
     }
   }
-}
 
-TEST(Q31__NEON, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_q31__neon);
-}
+  /* No rounding down Test - it fails because of upward bias in multiplication */
 
-TEST(Q31__NEON, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_q31__neon);
-}
+  TEST(Q31__NEON, divide_by_po2_with_rounding_away) {
+    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zeroPoint(zeroPoint)
+          .s(s)
+          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
+      }
+    }
+  }
 
-TEST(Q31__NEON, random_match_gemmlowp) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
-}
+  TEST(Q31__NEON, special_cases) {
+    RequantizationTester()
+      .testSpecialCases(xnn_requantize_q31__neon);
+  }
+
+  TEST(Q31__NEON, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_q31__neon);
+  }
+
+  TEST(Q31__NEON, random_match_gemmlowp) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
+  }
 
 
-/*
- * ARM NEON implementation from gemmlowp.
- */
+  /*
+   * ARM NEON implementation from gemmlowp.
+   */
 
-TEST(GEMMLOWP__NEON, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
-}
-
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  TEST(GEMMLOWP__NEON, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .testRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
diff --git a/test/u8-clamp.cc b/test/u8-clamp.cc
index b20a76f..4fd6563 100644
--- a/test/u8-clamp.cc
+++ b/test/u8-clamp.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/clamp.h>
 
+#include <xnnpack/clamp.h>
 #include "clamp-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(U8_CLAMP__NEON, n_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     ClampMicrokernelTester()
@@ -88,9 +88,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(U8_CLAMP__SSE2, n_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     ClampMicrokernelTester()
@@ -163,7 +163,7 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(U8_CLAMP__SCALAR, n_eq_1) {
   ClampMicrokernelTester()
diff --git a/test/u8-lut32norm.cc b/test/u8-lut32norm.cc
index f3b1f80..959839d 100644
--- a/test/u8-lut32norm.cc
+++ b/test/u8-lut32norm.cc
@@ -9,7 +9,6 @@
 #include <gtest/gtest.h>
 
 #include <xnnpack/lut.h>
-
 #include "lut-norm-microkernel-tester.h"
 
 
diff --git a/test/u8-maxpool.cc b/test/u8-maxpool.cc
index e2bee52..b5832dc 100644
--- a/test/u8-maxpool.cc
+++ b/test/u8-maxpool.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/maxpool.h>
 
+#include <xnnpack/maxpool.h>
 #include "maxpool-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(U8_MAXPOOL_9P8Q__NEON, kc_eq_16_unipass_fulltile) {
     TEST_REQUIRES_ARM_NEON;
     auto tester = MaxPoolMicrokernelTester()
@@ -1217,9 +1217,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(U8_MAXPOOL_9P8Q__SSE2, kc_eq_16_unipass_fulltile) {
     TEST_REQUIRES_X86_SSE2;
     auto tester = MaxPoolMicrokernelTester()
@@ -2421,7 +2421,7 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(U8_MAXPOOL_9P8Q__SCALAR, kc_eq_1_unipass_fulltile) {
   auto tester = MaxPoolMicrokernelTester()
diff --git a/test/u8-rmax.cc b/test/u8-rmax.cc
index ad915ad..8f6b03f 100644
--- a/test/u8-rmax.cc
+++ b/test/u8-rmax.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/rmax.h>
 
+#include <xnnpack/rmax.h>
 #include "rmax-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(U8RMAX__NEON, n_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t n = 1; n < 16; n++) {
@@ -49,9 +49,9 @@
         .Test(xnn_u8_rmax_ukernel__neon);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(U8RMAX__SSE2, n_lt_16) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t n = 1; n < 16; n++) {
@@ -85,7 +85,7 @@
         .Test(xnn_u8_rmax_ukernel__sse2);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(U8RMAX__SCALAR, n_lt_2) {
   for (size_t n = 1; n < 2; n++) {
diff --git a/test/x32-packx.cc b/test/x32-packx.cc
index ce4e428..d2a5f65 100644
--- a/test/x32-packx.cc
+++ b/test/x32-packx.cc
@@ -8,16 +8,16 @@
 //   Generator: tools/generate-pack-test.py
 
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
-#include <xnnpack/packx.h>
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
 
+#include <xnnpack/packx.h>
 #include "pack-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(X32_PACKX_4X__NEON_ST4, k_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     PackMicrokernelTester()
@@ -121,10 +121,10 @@
         .Test(xnn_x32_packx_ukernel_4x__neon_st4);
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(X32_PACKX_4X__SSE, k_eq_4) {
     TEST_REQUIRES_X86_SSE;
     PackMicrokernelTester()
@@ -228,10 +228,10 @@
         .Test(xnn_x32_packx_ukernel_4x__sse);
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
   TEST(X32_PACKX_4X__PSIMD, k_eq_4) {
     TEST_REQUIRES_PSIMD;
     PackMicrokernelTester()
@@ -335,7 +335,7 @@
         .Test(xnn_x32_packx_ukernel_4x__psimd);
     }
   }
-#endif  // !CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 TEST(X32_PACKX_2X__SCALAR, k_eq_1) {
diff --git a/test/x32-pad.cc b/test/x32-pad.cc
index 615deaf..793a7f8 100644
--- a/test/x32-pad.cc
+++ b/test/x32-pad.cc
@@ -5,13 +5,14 @@
 
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/pad.h>
 
+#include <xnnpack/pad.h>
 #include "pad-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(X32_PAD_X2__NEON, fulltile_copy_n_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     PadMicrokernelTester()
@@ -201,10 +202,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(X32_PAD_X2__SSE2, fulltile_copy_n_eq_4) {
     TEST_REQUIRES_X86_SSE2;
     PadMicrokernelTester()
@@ -394,10 +395,10 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(X32_PAD_X2__PSIMD, fulltile_copy_n_eq_4) {
     TEST_REQUIRES_PSIMD;
     PadMicrokernelTester()
@@ -587,7 +588,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(X32_PAD_X2__SCALAR, fulltile_copy_n_eq_1) {
diff --git a/test/x32-unpool.cc b/test/x32-unpool.cc
index 04dd20f..dd9c17e 100644
--- a/test/x32-unpool.cc
+++ b/test/x32-unpool.cc
@@ -5,13 +5,14 @@
 
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/unpool.h>
 
+#include <xnnpack/unpool.h>
 #include "unpool-microkernel-tester.h"
 
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(X32_UNPOOL__PSIMD, c_eq_4) {
     TEST_REQUIRES_PSIMD;
     UnpoolMicrokernelTester()
@@ -83,7 +84,7 @@
         .Test(xnn_x32_unpool_ukernel__psimd);
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 
 TEST(X32_UNPOOL__SCALAR, c_eq_1) {
diff --git a/test/x32-zip.cc b/test/x32-zip.cc
index 7fe9dc7..729bcba 100644
--- a/test/x32-zip.cc
+++ b/test/x32-zip.cc
@@ -3,16 +3,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/zip.h>
 
+#include <xnnpack/zip.h>
 #include "zip-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(X32_ZIP_X2__NEON, n_eq_4) {
     TEST_REQUIRES_ARM_NEON;
     ZipMicrokernelTester()
@@ -256,9 +256,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(X32_ZIP_X2__SSE2, n_eq_4) {
     TEST_REQUIRES_X86_SSE2;
     ZipMicrokernelTester()
@@ -502,9 +502,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
-#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
   TEST(X32_ZIP_X2__PSIMD, n_eq_4) {
     TEST_REQUIRES_PSIMD;
     ZipMicrokernelTester()
@@ -748,7 +748,7 @@
       }
     }
   }
-#endif  // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
+#endif  // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
 
 TEST(X32_ZIP_X2__SCALAR, n_eq_1) {
   ZipMicrokernelTester()
diff --git a/test/x8-lut.cc b/test/x8-lut.cc
index aacbad2..4acfec8 100644
--- a/test/x8-lut.cc
+++ b/test/x8-lut.cc
@@ -9,7 +9,6 @@
 #include <gtest/gtest.h>
 
 #include <xnnpack/lut.h>
-
 #include "lut-microkernel-tester.h"
 
 
diff --git a/test/x8-zip.cc b/test/x8-zip.cc
index 8fe18b4..c92ee35 100644
--- a/test/x8-zip.cc
+++ b/test/x8-zip.cc
@@ -6,16 +6,16 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
-#include <cpuinfo.h>
 #include <gtest/gtest.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>
-#include <xnnpack/zip.h>
 
+#include <xnnpack/zip.h>
 #include "zip-microkernel-tester.h"
 
 
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(X8_ZIP_X2__NEON, n_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     ZipMicrokernelTester()
@@ -237,9 +237,9 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   TEST(X8_ZIP_X2__SSE2, n_eq_16) {
     TEST_REQUIRES_X86_SSE2;
     ZipMicrokernelTester()
@@ -489,7 +489,7 @@
       }
     }
   }
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 TEST(X8_ZIP_X2__SCALAR, n_eq_1) {
   ZipMicrokernelTester()