Limit direct dependencies on cpuinfo
PiperOrigin-RevId: 272245408
diff --git a/src/init.c b/src/init.c
index b0ad23d..a5ca59d 100644
--- a/src/init.c
+++ b/src/init.c
@@ -18,6 +18,7 @@
#include <xnnpack/argmaxpool.h>
#include <xnnpack/avgpool.h>
#include <xnnpack/clamp.h>
+#include <xnnpack/common.h>
#include <xnnpack/conv.h>
#include <xnnpack/dwconv.h>
#include <xnnpack/gavgpool.h>
@@ -48,15 +49,15 @@
.initialized = false
};
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_PNACL || XNN_ARCH_ASMJS || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
extern uint32_t xnn_stub_wasm_f32_sub(uint32_t a, uint32_t b);
#endif
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_PNACL || XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
extern uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b);
#endif
static void init(void) {
-#if CPUINFO_ARCH_ARM
+#if XNN_ARCH_ARM
if (!cpuinfo_has_arm_neon()) {
xnn_log_error("XNNPACK initialization failed: NEON is not supported");
return;
@@ -206,7 +207,7 @@
.xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
};
-#elif CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM64
/**************************** Q8 micro-kernels ****************************/
xnn_params.q8.gemm = (struct gemm_parameters) {
@@ -485,7 +486,7 @@
.xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__neon,
};
-#elif CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#elif XNN_ARCH_X86 || XNN_ARCH_X86_64
if (!cpuinfo_has_x86_sse2()) {
xnn_log_error("XNNPACK initialization failed: SSE2 is not supported");
return;
@@ -649,7 +650,7 @@
.xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__sse2,
};
-#elif CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASMSIMD
+#elif XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
/**************************** Q8 micro-kernels ****************************/
xnn_params.q8.gemm = (struct gemm_parameters) {
.gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_2x2__scalar,
@@ -786,7 +787,7 @@
.xm = (xnn_zipv_ukernel_function) xnn_x32_zip_xm_ukernel__psimd,
};
-#elif CPUINFO_ARCH_WASM || CPUINFO_ARCH_ASMJS
+#elif XNN_ARCH_WASM || XNN_ARCH_ASMJS
// Unlike most other architectures, on x86/x86-64 when floating-point instructions
// have no NaN arguments, but produce NaN output, the output NaN has sign bit set.
// We use it to distinguish x86/x86-64 from other architectures, by doing subtraction
diff --git a/src/wasm-stubs.c b/src/wasm-stubs.c
index 29826d7..b9273c5 100644
--- a/src/wasm-stubs.c
+++ b/src/wasm-stubs.c
@@ -12,8 +12,8 @@
return fp32_to_bits(fp32_from_bits(a) - fp32_from_bits(b));
}
-#if CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
uint32_t xnn_stub_wasm_f32_min(uint32_t a, uint32_t b) {
return fp32_to_bits(__builtin_wasm_min_f32(fp32_from_bits(a), fp32_from_bits(b)));
}
-#endif // CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD
+#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index 303aa37..c946656 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -12,7 +12,7 @@
#include <malloc.h>
#endif
-#include <cpuinfo.h>
+#include <xnnpack/common.h>
extern int posix_memalign(void **memptr, size_t alignment, size_t size);
@@ -22,7 +22,7 @@
inline static void* xnn_allocate_memory(size_t memory_size) {
void* memory_ptr = NULL;
-#if CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
memory_ptr = malloc(memory_size);
#elif defined(__ANDROID__)
memory_ptr = memalign(XNN_ALLOCATION_ALIGNMENT, memory_size);
diff --git a/src/xnnpack/common.h b/src/xnnpack/common.h
index 2dbc451..673b92a 100644
--- a/src/xnnpack/common.h
+++ b/src/xnnpack/common.h
@@ -9,6 +9,83 @@
#pragma once
+// Define architecture indentification macros
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86)
+ #define XNN_ARCH_X86 1
+#endif
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+ #define XNN_ARCH_X86_64 1
+#endif
+
+#if defined(__arm__) || defined(_M_ARM)
+ #define XNN_ARCH_ARM 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+ #define XNN_ARCH_ARM64 1
+#endif
+
+#if defined(__PPC64__) || defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64)
+ #define XNN_ARCH_PPC64 1
+#endif
+
+#if defined(__pnacl__)
+ #define XNN_ARCH_PNACL 1
+#endif
+
+#if defined(__asmjs__)
+ #define XNN_ARCH_ASMJS 1
+#endif
+
+#if defined(__wasm__)
+ #if defined(__wasm_simd128__)
+ #define XNN_ARCH_WASMSIMD 1
+ #else
+ #define XNN_ARCH_WASM 1
+ #endif
+#endif
+
+// Ensure each architecture indentification macro is always defined, as either 0 or 1
+
+#ifndef XNN_ARCH_X86
+ #define XNN_ARCH_X86 0
+#endif
+
+#ifndef XNN_ARCH_X86_64
+ #define XNN_ARCH_X86_64 0
+#endif
+
+#ifndef XNN_ARCH_ARM
+ #define XNN_ARCH_ARM 0
+#endif
+
+#ifndef XNN_ARCH_ARM64
+ #define XNN_ARCH_ARM64 0
+#endif
+
+#ifndef XNN_ARCH_PPC64
+ #define XNN_ARCH_PPC64 0
+#endif
+
+#ifndef XNN_ARCH_PNACL
+ #define XNN_ARCH_PNACL 0
+#endif
+
+#ifndef XNN_ARCH_ASMJS
+ #define XNN_ARCH_ASMJS 0
+#endif
+
+#ifndef XNN_ARCH_WASM
+ #define XNN_ARCH_WASM 0
+#endif
+
+#ifndef XNN_ARCH_WASMSIMD
+ #define XNN_ARCH_WASMSIMD 0
+#endif
+
+
#if defined(__GNUC__)
#if defined(__clang__) || (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 5)
#define XNN_UNREACHABLE do { __builtin_unreachable(); } while (0)
diff --git a/src/xnnpack/isa-checks.h b/src/xnnpack/isa-checks.h
index 0bdf97c..e4aebd6 100644
--- a/src/xnnpack/isa-checks.h
+++ b/src/xnnpack/isa-checks.h
@@ -10,8 +10,10 @@
#include <cpuinfo.h>
+#include <xnnpack/common.h>
-#if CPUINFO_ARCH_PNACL || CPUINFO_ARCH_WASMSIMD
+
+#if XNN_ARCH_PNACL || XNN_ARCH_WASMSIMD
#define TEST_REQUIRES_PSIMD
#else
#define TEST_REQUIRES_PSIMD \
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 11065c4..7313032 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -12,8 +12,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <cpuinfo.h>
-
#include <xnnpack/common.h>
#define XNN_INTERNAL_EXTRA_BYTES 32
@@ -29,12 +27,12 @@
float max;
float min;
} scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) float max[4];
XNN_ALIGN(16) float min[4];
} sse;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_f32_spchw_params {
@@ -42,7 +40,7 @@
float max;
float min;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
@@ -50,8 +48,8 @@
float min;
float max;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) uint32_t mask_even[4]; // used by stride 2 kernels
XNN_ALIGN(16) uint32_t mask_odd[4]; // used by stride 2 kernels
@@ -59,7 +57,7 @@
XNN_ALIGN(16) float max[4];
XNN_ALIGN(16) float min[4];
} sse;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_u8_output_params {
@@ -67,18 +65,18 @@
int32_t max;
int32_t min;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
uint8_t max;
uint8_t min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) uint8_t max[16];
XNN_ALIGN(16) uint8_t min[16];
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_f32_avgpool_params {
@@ -87,20 +85,20 @@
float output_min;
float output_max;
} scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) float multiplier[4];
XNN_ALIGN(16) float output_max[4];
XNN_ALIGN(16) float output_min[4];
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
XNN_ALIGN(16) float multiplier;
XNN_ALIGN(16) float output_max;
XNN_ALIGN(16) float output_min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
};
union xnn_f32_gavgpool_params {
@@ -109,22 +107,22 @@
float output_min;
float output_max;
} scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) float multiplier[4];
XNN_ALIGN(16) float output_max[4];
XNN_ALIGN(16) float output_min[4];
XNN_ALIGN(16) uint32_t mask[4];
} sse;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
XNN_ALIGN(16) float multiplier;
XNN_ALIGN(16) float output_max;
XNN_ALIGN(16) float output_min;
XNN_ALIGN(16) uint32_t mask[4];
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 */
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 */
};
union xnn_f32_hswish_params {
@@ -133,13 +131,13 @@
float half;
float one;
} scalar;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) float sixth[4];
XNN_ALIGN(16) float half[4];
XNN_ALIGN(16) float one[4];
} sse;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_q8_gemm_params {
@@ -154,7 +152,7 @@
int32_t output_max_less_zero_point;
int32_t output_zero_point;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
int16_t kernel_zero_point;
int16_t input_zero_point;
@@ -164,8 +162,8 @@
uint8_t output_max;
uint8_t output_min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) int16_t kernel_zero_point[8];
XNN_ALIGN(16) int16_t input_zero_point[8];
@@ -178,7 +176,7 @@
XNN_ALIGN(16) uint8_t output_max[16];
XNN_ALIGN(16) uint8_t output_min[16];
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_q8_add_params {
@@ -193,7 +191,7 @@
int32_t y_max;
int32_t y_min;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
uint8_t a_zero_point;
uint8_t b_zero_point;
@@ -204,8 +202,8 @@
uint8_t y_max;
uint8_t y_min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) int32_t zero_point_product[4];
XNN_ALIGN(16) uint16_t a_multiplier_lo[8];
@@ -221,7 +219,7 @@
uint32_t a_multiplier;
uint32_t b_multiplier;
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_q8_avgpool_params {
@@ -234,7 +232,7 @@
int32_t output_max_less_zero_point;
int32_t output_zero_point;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
int32_t bias;
int32_t multiplier;
@@ -243,8 +241,8 @@
uint8_t output_max;
uint8_t output_min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) int32_t bias[4];
XNN_ALIGN(16) uint32_t multiplier[4];
@@ -254,7 +252,7 @@
XNN_ALIGN(16) uint8_t output_max[16];
XNN_ALIGN(16) uint8_t output_min[16];
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_fp32_requantization_params {
@@ -330,7 +328,7 @@
int32_t max_less_zero_point;
int32_t zero_point;
} scalar;
-#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
struct {
int32_t multiplier;
int32_t right_shift;
@@ -338,8 +336,8 @@
uint8_t max;
uint8_t min;
} neon;
-#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
struct {
XNN_ALIGN(16) uint32_t multiplier[4];
XNN_ALIGN(16) uint64_t rounding[2];
@@ -350,7 +348,7 @@
XNN_ALIGN(16) uint8_t max[16];
XNN_ALIGN(16) uint8_t min[16];
} sse2;
-#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
union xnn_requantization_params {
diff --git a/src/xnnpack/requantization.h b/src/xnnpack/requantization.h
index 51cff74..77f771a 100644
--- a/src/xnnpack/requantization.h
+++ b/src/xnnpack/requantization.h
@@ -22,6 +22,7 @@
#include <fp16.h>
+#include <xnnpack/common.h>
#include <xnnpack/params.h>
#include <xnnpack/scalar-utils.h>
@@ -87,7 +88,7 @@
assert(shift < 32);
union xnn_q8_gemm_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
const uint32_t remainder_threshold = remainder_mask >> 1;
for (uint32_t i = 0; i < 8; i++) {
@@ -117,7 +118,7 @@
params.sse2.output_max[i] = output_max;
params.sse2.output_min[i] = output_min;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params.neon.input_zero_point = (int16_t) (uint16_t) input_zero_point;
params.neon.kernel_zero_point = (int16_t) (uint16_t) kernel_zero_point;
params.neon.multiplier = multiplier;
@@ -166,7 +167,7 @@
assert(shift < 64);
union xnn_q8_avgpool_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
const uint32_t right_shift = (uint32_t) shift;
const uint64_t rounding = UINT64_C(1) << (right_shift - 1);
params.sse2.bias[0] = bias;
@@ -188,7 +189,7 @@
params.sse2.output_max[i] = output_max;
params.sse2.output_min[i] = output_min;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params.neon.bias = bias;
params.neon.multiplier = multiplier;
params.neon.left_shift = (int64_t) -shift;
@@ -252,7 +253,7 @@
union xnn_f32_avgpool_params* params,
float multiplier)
{
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params->sse2.multiplier[i] = multiplier;
}
@@ -267,7 +268,7 @@
float output_max)
{
union xnn_f32_avgpool_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params.sse2.multiplier[i] = multiplier;
params.sse2.output_min[i] = output_min;
@@ -288,7 +289,7 @@
uint32_t width)
{
union xnn_f32_gavgpool_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params.sse.multiplier[i] = multiplier;
params.sse.output_min[i] = output_min;
@@ -320,7 +321,7 @@
params.sse.mask[3] = 0;
break;
}
-#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM || XNN_ARCH_ARM64
switch (width % 4) {
case 0:
params.neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -363,7 +364,7 @@
float multiplier,
uint32_t width)
{
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params->sse.multiplier[i] = multiplier;
}
@@ -393,7 +394,7 @@
params->sse.mask[3] = 0;
break;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params->neon.multiplier = multiplier;
switch (width % 4) {
case 0:
@@ -454,7 +455,7 @@
float output_max)
{
union xnn_f32_output_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params.sse.min[i] = output_min;
params.sse.max[i] = output_max;
@@ -479,7 +480,7 @@
static inline union xnn_f32_hswish_params xnn_compute_f32_hswish_params(void)
{
union xnn_f32_hswish_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params.sse.sixth[i] = 0x1.555556p-3f;
params.sse.half[i] = 0.5f;
@@ -508,7 +509,7 @@
float output_max)
{
union xnn_f32_spchw_params params;
-#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
switch (width % 4) {
case 0:
params.sse.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -621,7 +622,7 @@
params.sse.max[i] = output_max;
params.sse.min[i] = output_min;
}
-#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+#elif XNN_ARCH_ARM || XNN_ARCH_ARM64
switch (width % 4) {
case 0:
params.neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -743,7 +744,7 @@
union xnn_f32_spchw_params* params,
uint32_t width)
{
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
switch (width % 4) {
case 0:
params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -852,7 +853,7 @@
params->sse.mask_odd[3] = 0;
break;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
switch (width % 4) {
case 0:
params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
@@ -982,12 +983,12 @@
assert(output_min < output_max);
union xnn_u8_output_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 16; i++) {
params.sse2.max[i] = output_max;
params.sse2.min[i] = output_min;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params.neon.max = output_max;
params.neon.min = output_min;
#else
@@ -1044,7 +1045,7 @@
assert(b_multiplier < UINT32_C(0x00400000));
union xnn_q8_add_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
const uint32_t remainder_threshold = remainder_mask >> 1;
const int32_t zero_point_product =
@@ -1072,7 +1073,7 @@
params.sse2.y_max[i] = output_max;
params.sse2.y_min[i] = output_min;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params.neon.a_zero_point = a_zero_point;
params.neon.b_zero_point = b_zero_point;
params.neon.y_zero_point = (int16_t) (uint16_t) output_zero_point;
@@ -1200,7 +1201,7 @@
assert(shift < 32);
union xnn_q31_requantization_params params;
- #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ #if XNN_ARCH_X86 || XNN_ARCH_X86_64
const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
const uint32_t remainder_threshold = remainder_mask >> 1;
params.sse2.multiplier[0] = multiplier;
@@ -1226,7 +1227,7 @@
params.sse2.max[i] = max;
params.sse2.min[i] = min;
}
- #elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+ #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
params.neon.multiplier = multiplier;
params.neon.right_shift = -shift;
params.neon.zero_point = (int16_t) (uint16_t) zero_point;