Limit direct dependencies on cpuinfo

PiperOrigin-RevId: 272245408
diff --git a/src/init.c b/src/init.c
index b0ad23d..a5ca59d 100644
--- a/src/init.c
+++ b/src/init.c
@@ -18,6 +18,7 @@
 #include <xnnpack/argmaxpool.h>
 #include <xnnpack/avgpool.h>
 #include <xnnpack/clamp.h>
+#include <xnnpack/common.h>
 #include <xnnpack/conv.h>
 #include <xnnpack/dwconv.h>
 #include <xnnpack/gavgpool.h>
@@ -48,15 +49,15 @@
   .initialized = false
 };
 
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
 #endif
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
 #endif
 
 static void init(void) {
-#if CPUINFO_ARCH_ARM
+#if XNN_ARCH_ARM
   if (!cpuinfo_has_arm_neon()) {
     xnn_log_error("XNNPACK initialization failed: NEON is not supported");
     return;
@@ -206,7 +207,7 @@
     .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
   };
 
-#elif CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM64
 
   /**************************** Q8 micro-kernels ****************************/
   xnn_params.q8.gemm = (struct gemm_parameters) {
@@ -485,7 +486,7 @@
     .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
   };
 
-#elif CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
   if (!cpuinfo_has_x86_sse2()) {
     xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
     return;
@@ -649,7 +650,7 @@
     .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
   };
 
-#elif CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASMSIMD
+#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
   /**************************** Q8 micro-kernels ****************************/
   xnn_params.q8.gemm = (struct gemm_parameters) {
     .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
@@ -786,7 +787,7 @@
     .xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
   };
 
-#elif CPUINFO_ARCH_WASM || CPUINFO_ARCH_ASMJS
+#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
   // Unlike most other architectures, on x86/x86-64 when floating-point instructions
   // have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
   // We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
diff --git a/src/wasm-stubs.c b/src/wasm-stubs.c
index 29826d7..b9273c5 100644
--- a/src/wasm-stubs.c
+++ b/src/wasm-stubs.c
@@ -12,8 +12,8 @@
   return fp32_to_bits(fp32_from_bits(a) - fp32_from_bits(b));
 }
 
-#if CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
 uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b) {
   return fp32_to_bits(__builtin_wasm_min_f32(fp32_from_bits(a), fp32_from_bits(b)));
 }
-#endif  // CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index 303aa37..c946656 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -12,7 +12,7 @@
   #include <malloc.h>
 #endif
 
-#include <cpuinfo.h>
+#include <xnnpack/common.h>
 
 extern int posix_memalign(void **memptr, size_t alignment, size_t size);
 
@@ -22,7 +22,7 @@
 
 inline static void* xnn_allocate_memory(size_t memory_size) {
   void* memory_ptr = NULL;
-#if CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
   memory_ptr = malloc(memory_size);
 #elif defined(__ANDROID__)
   memory_ptr = memalign(XNN_ALLOCATION_ALIGNMENT, memory_size);
diff --git a/src/xnnpack/common.h b/src/xnnpack/common.h
index 2dbc451..673b92a 100644
--- a/src/xnnpack/common.h
+++ b/src/xnnpack/common.h
@@ -9,6 +9,83 @@
 #pragma once
 
 
+// Define architecture indentification macros
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86)
+  #define XNN_ARCH_X86 1
+#endif
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+  #define XNN_ARCH_X86_64 1
+#endif
+
+#if defined(__arm__) || defined(_M_ARM)
+  #define XNN_ARCH_ARM 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+  #define XNN_ARCH_ARM64 1
+#endif
+
+#if defined(__PPC64__) || defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64)
+  #define XNN_ARCH_PPC64 1
+#endif
+
+#if defined(__pnacl__)
+  #define XNN_ARCH_PNACL 1
+#endif
+
+#if defined(__asmjs__)
+  #define XNN_ARCH_ASMJS 1
+#endif
+
+#if defined(__wasm__)
+  #if defined(__wasm_simd128__)
+    #define XNN_ARCH_WASMSIMD 1
+  #else
+    #define XNN_ARCH_WASM 1
+  #endif
+#endif
+
+// Ensure each architecture indentification macro is always defined, as either 0 or 1
+
+#ifndef XNN_ARCH_X86
+  #define XNN_ARCH_X86 0
+#endif
+
+#ifndef XNN_ARCH_X86_64
+  #define XNN_ARCH_X86_64 0
+#endif
+
+#ifndef XNN_ARCH_ARM
+  #define XNN_ARCH_ARM 0
+#endif
+
+#ifndef XNN_ARCH_ARM64
+  #define XNN_ARCH_ARM64 0
+#endif
+
+#ifndef XNN_ARCH_PPC64
+  #define XNN_ARCH_PPC64 0
+#endif
+
+#ifndef XNN_ARCH_PNACL
+  #define XNN_ARCH_PNACL 0
+#endif
+
+#ifndef XNN_ARCH_ASMJS
+  #define XNN_ARCH_ASMJS 0
+#endif
+
+#ifndef XNN_ARCH_WASM
+  #define XNN_ARCH_WASM 0
+#endif
+
+#ifndef XNN_ARCH_WASMSIMD
+  #define XNN_ARCH_WASMSIMD 0
+#endif
+
+
 #if defined(__GNUC__)
   #if defined(__clang__) || (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 5)
     #define XNN_UNREACHABLE do { __builtin_unreachable(); } while (0)
diff --git a/src/xnnpack/isa-checks.h b/src/xnnpack/isa-checks.h
index 0bdf97c..e4aebd6 100644
--- a/src/xnnpack/isa-checks.h
+++ b/src/xnnpack/isa-checks.h
@@ -10,8 +10,10 @@
 
 #include <cpuinfo.h>
 
+#include <xnnpack/common.h>
 
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASMSIMD
+
+#if XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
   #define TEST_REQUIRES_PSIMD
 #else
   #define TEST_REQUIRES_PSIMD \
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 11065c4..7313032 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -12,8 +12,6 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include <cpuinfo.h>
-
 #include <xnnpack/common.h>
 
 #define XNN_INTERNAL_EXTRA_BYTES 32
@@ -29,12 +27,12 @@
     float max;
     float min;
   } scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) float max[4];
     XNN_ALIGN(16) float min[4];
   } sse;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_f32_spchw_params {
@@ -42,7 +40,7 @@
     float max;
     float min;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
     XNN_ALIGN(16) uint32_t mask_odd[4];  // used by stride 2 kernels
@@ -50,8 +48,8 @@
     float min;
     float max;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
     XNN_ALIGN(16) uint32_t mask_odd[4];  // used by stride 2 kernels
@@ -59,7 +57,7 @@
     XNN_ALIGN(16) float max[4];
     XNN_ALIGN(16) float min[4];
   } sse;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_u8_output_params {
@@ -67,18 +65,18 @@
     int32_t max;
     int32_t min;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     uint8_t max;
     uint8_t min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) uint8_t max[16];
     XNN_ALIGN(16) uint8_t min[16];
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_f32_avgpool_params {
@@ -87,20 +85,20 @@
     float output_min;
     float output_max;
   } scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) float multiplier[4];
     XNN_ALIGN(16) float output_max[4];
     XNN_ALIGN(16) float output_min[4];
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     XNN_ALIGN(16) float multiplier;
     XNN_ALIGN(16) float output_max;
     XNN_ALIGN(16) float output_min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 };
 
 union xnn_f32_gavgpool_params {
@@ -109,22 +107,22 @@
     float output_min;
     float output_max;
   } scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) float multiplier[4];
     XNN_ALIGN(16) float output_max[4];
     XNN_ALIGN(16) float output_min[4];
     XNN_ALIGN(16) uint32_t mask[4];
   } sse;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     XNN_ALIGN(16) float multiplier;
     XNN_ALIGN(16) float output_max;
     XNN_ALIGN(16) float output_min;
     XNN_ALIGN(16) uint32_t mask[4];
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
 };
 
 union xnn_f32_hswish_params {
@@ -133,13 +131,13 @@
     float half;
     float one;
   } scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) float sixth[4];
     XNN_ALIGN(16) float half[4];
     XNN_ALIGN(16) float one[4];
   } sse;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_q8_gemm_params {
@@ -154,7 +152,7 @@
     int32_t output_max_less_zero_point;
     int32_t output_zero_point;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     int16_t kernel_zero_point;
     int16_t input_zero_point;
@@ -164,8 +162,8 @@
     uint8_t output_max;
     uint8_t output_min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) int16_t kernel_zero_point[8];
     XNN_ALIGN(16) int16_t input_zero_point[8];
@@ -178,7 +176,7 @@
     XNN_ALIGN(16) uint8_t output_max[16];
     XNN_ALIGN(16) uint8_t output_min[16];
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_q8_add_params {
@@ -193,7 +191,7 @@
     int32_t y_max;
     int32_t y_min;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     uint8_t a_zero_point;
     uint8_t b_zero_point;
@@ -204,8 +202,8 @@
     uint8_t y_max;
     uint8_t y_min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) int32_t zero_point_product[4];
     XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
@@ -221,7 +219,7 @@
     uint32_t a_multiplier;
     uint32_t b_multiplier;
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_q8_avgpool_params {
@@ -234,7 +232,7 @@
     int32_t output_max_less_zero_point;
     int32_t output_zero_point;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     int32_t bias;
     int32_t multiplier;
@@ -243,8 +241,8 @@
     uint8_t output_max;
     uint8_t output_min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) int32_t bias[4];
     XNN_ALIGN(16) uint32_t multiplier[4];
@@ -254,7 +252,7 @@
     XNN_ALIGN(16) uint8_t output_max[16];
     XNN_ALIGN(16) uint8_t output_min[16];
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_fp32_requantization_params {
@@ -330,7 +328,7 @@
     int32_t max_less_zero_point;
     int32_t zero_point;
   } scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     int32_t multiplier;
     int32_t right_shift;
@@ -338,8 +336,8 @@
     uint8_t max;
     uint8_t min;
   } neon;
-#endif  // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
     XNN_ALIGN(16) uint32_t multiplier[4];
     XNN_ALIGN(16) uint64_t rounding[2];
@@ -350,7 +348,7 @@
     XNN_ALIGN(16) uint8_t max[16];
     XNN_ALIGN(16) uint8_t min[16];
   } sse2;
-#endif  // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 };
 
 union xnn_requantization_params {
diff --git a/src/xnnpack/requantization.h b/src/xnnpack/requantization.h
index 51cff74..77f771a 100644
--- a/src/xnnpack/requantization.h
+++ b/src/xnnpack/requantization.h
@@ -22,6 +22,7 @@
 
 #include <fp16.h>
 
+#include <xnnpack/common.h>
 #include <xnnpack/params.h>
 #include <xnnpack/scalar-utils.h>
 
@@ -87,7 +88,7 @@
   assert(shift < 32);
 
   union xnn_q8_gemm_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
     const uint32_t remainder_threshold = remainder_mask >> 1;
     for (uint32_t i = 0; i < 8; i++) {
@@ -117,7 +118,7 @@
       params.sse2.output_max[i] = output_max;
       params.sse2.output_min[i] = output_min;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params.neon.input_zero_point = (int16_t) (uint16_t) input_zero_point;
     params.neon.kernel_zero_point = (int16_t) (uint16_t) kernel_zero_point;
     params.neon.multiplier = multiplier;
@@ -166,7 +167,7 @@
   assert(shift < 64);
 
   union xnn_q8_avgpool_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     const uint32_t right_shift = (uint32_t) shift;
     const uint64_t rounding = UINT64_C(1) << (right_shift - 1);
     params.sse2.bias[0] = bias;
@@ -188,7 +189,7 @@
       params.sse2.output_max[i] = output_max;
       params.sse2.output_min[i] = output_min;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params.neon.bias = bias;
     params.neon.multiplier = multiplier;
     params.neon.left_shift = (int64_t) -shift;
@@ -252,7 +253,7 @@
   union xnn_f32_avgpool_params* params,
   float multiplier)
 {
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     for (uint32_t i = 0; i < 4; i++) {
       params->sse2.multiplier[i] = multiplier;
     }
@@ -267,7 +268,7 @@
   float output_max)
 {
   union xnn_f32_avgpool_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   for (uint32_t i = 0; i < 4; i++) {
     params.sse2.multiplier[i] = multiplier;
     params.sse2.output_min[i] = output_min;
@@ -288,7 +289,7 @@
   uint32_t width)
 {
   union xnn_f32_gavgpool_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     for (uint32_t i = 0; i < 4; i++) {
       params.sse.multiplier[i] = multiplier;
       params.sse.output_min[i] = output_min;
@@ -320,7 +321,7 @@
       params.sse.mask[3] = 0;
       break;
   }
-#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     switch (width % 4) {
       case 0:
         params.neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -363,7 +364,7 @@
   float multiplier,
   uint32_t width)
 {
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     for (uint32_t i = 0; i < 4; i++) {
       params->sse.multiplier[i] = multiplier;
     }
@@ -393,7 +394,7 @@
         params->sse.mask[3] = 0;
         break;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params->neon.multiplier = multiplier;
     switch (width % 4) {
       case 0:
@@ -454,7 +455,7 @@
   float output_max)
 {
   union xnn_f32_output_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   for (uint32_t i = 0; i < 4; i++) {
     params.sse.min[i] = output_min;
     params.sse.max[i] = output_max;
@@ -479,7 +480,7 @@
 static inline union xnn_f32_hswish_params xnn_compute_f32_hswish_params(void)
 {
   union xnn_f32_hswish_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   for (uint32_t i = 0; i < 4; i++) {
     params.sse.sixth[i] = 0x1.555556p-3f;
     params.sse.half[i] = 0.5f;
@@ -508,7 +509,7 @@
   float output_max)
 {
   union xnn_f32_spchw_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
   switch (width % 4) {
     case 0:
       params.sse.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -621,7 +622,7 @@
     params.sse.max[i] = output_max;
     params.sse.min[i] = output_min;
   }
-#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM || XNN_ARCH_ARM64
   switch (width % 4) {
     case 0:
       params.neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -743,7 +744,7 @@
   union xnn_f32_spchw_params* params,
   uint32_t width)
 {
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     switch (width % 4) {
       case 0:
         params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -852,7 +853,7 @@
         params->sse.mask_odd[3] = 0;
         break;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     switch (width % 4) {
       case 0:
         params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -982,12 +983,12 @@
   assert(output_min < output_max);
 
   union xnn_u8_output_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     for (uint32_t i = 0; i < 16; i++) {
       params.sse2.max[i] = output_max;
       params.sse2.min[i] = output_min;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params.neon.max = output_max;
     params.neon.min = output_min;
   #else
@@ -1044,7 +1045,7 @@
   assert(b_multiplier < UINT32_C(0x00400000));
 
   union xnn_q8_add_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
     const uint32_t remainder_threshold = remainder_mask >> 1;
     const int32_t zero_point_product =
@@ -1072,7 +1073,7 @@
       params.sse2.y_max[i] = output_max;
       params.sse2.y_min[i] = output_min;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params.neon.a_zero_point = a_zero_point;
     params.neon.b_zero_point = b_zero_point;
     params.neon.y_zero_point = (int16_t) (uint16_t) output_zero_point;
@@ -1200,7 +1201,7 @@
   assert(shift < 32);
 
   union xnn_q31_requantization_params params;
-  #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  #if XNN_ARCH_X86 || XNN_ARCH_X86_64
     const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
     const uint32_t remainder_threshold = remainder_mask >> 1;
     params.sse2.multiplier[0] = multiplier;
@@ -1226,7 +1227,7 @@
       params.sse2.max[i] = max;
       params.sse2.min[i] = min;
     }
-  #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
     params.neon.multiplier = multiplier;
     params.neon.right_shift = -shift;
     params.neon.zero_point = (int16_t) (uint16_t) zero_point;