Add checks for target ISA in microbenchmarks

Avoid running micro-kernels incompatible with the host processor

PiperOrigin-RevId: 282471857
diff --git a/bench/f32-igemm.cc b/bench/f32-igemm.cc
index fc82032..197f758 100644
--- a/bench/f32-igemm.cc
+++ b/bench/f32-igemm.cc
@@ -27,11 +27,15 @@
 
 static void IGEMMBenchmark(benchmark::State& state,
   xnn_f32_igemm_ukernel_function f32_igemm,
-  uint32_t mr, uint32_t nr, uint32_t kr, uint32_t sr)
+  uint32_t mr, uint32_t nr, uint32_t kr, uint32_t sr,
+  benchmark::utils::IsaCheckFunction isa_check = nullptr)
 {
   if (!cpuinfo_initialize()) {
     state.SkipWithError("cpuinfo initialization failed");
   }
+  if (isa_check && !isa_check(state)) {
+    return;
+  }
 
   const size_t input_height = state.range(0);
   const size_t input_width = state.range(1);
@@ -152,71 +156,71 @@
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   static void f32_igemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x2__neon_lane_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x2__neon_lane_ld64, 4, 2, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x2__neon_lane_ld64, 4, 2, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x4__neon_lane_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x4__neon_lane_ld64, 4, 4, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x4__neon_lane_ld64, 4, 4, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1, benchmark::utils::CheckNEON);
   }
 
   static void f32_igemm_1x8__neon_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neon_dup_ld64, 1, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neon_dup_ld64, 1, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x8__neon_dup_ld128(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_dup_ld128, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_dup_ld128, 4, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x8__neon_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_dup_ld64, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neon_dup_ld64, 4, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_6x8__neon_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neon_dup_ld64, 6, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neon_dup_ld64, 6, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_1x8s4__neon(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8s4__neon, 1, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8s4__neon, 1, 8, 1, 4, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_1x8s4__neonfma(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8s4__neonfma, 1, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8s4__neonfma, 1, 8, 1, 4, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_4x8s4__neon(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8s4__neon, 4, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8s4__neon, 4, 8, 1, 4, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_4x8s4__neonfma(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8s4__neonfma, 4, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8s4__neonfma, 4, 8, 1, 4, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_6x8s4__neon(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8s4__neon, 6, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8s4__neon, 6, 8, 1, 4, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_6x8s4__neonfma(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8s4__neonfma, 6, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8s4__neonfma, 6, 8, 1, 4, benchmark::utils::CheckNEONFMA);
   }
   static void f32_igemm_8x8s4__neon(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8s4__neon, 8, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8s4__neon, 8, 8, 1, 4, benchmark::utils::CheckNEON);
   }
   static void f32_igemm_8x8s4__neonfma(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8s4__neonfma, 8, 8, 1, 4);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8s4__neonfma, 8, 8, 1, 4, benchmark::utils::CheckNEONFMA);
   }
 
   BENCHMARK_CONV(f32_igemm_1x8__neon_lane_ld64)
@@ -362,47 +366,47 @@
   }
 
   static void f32_igemm_1x8__avx_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__avx_broadcast, 1, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__avx_broadcast, 1, 8, 1, 1, benchmark::utils::CheckAVX);
   }
 
   static void f32_igemm_4x8__avx_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__avx_broadcast, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__avx_broadcast, 4, 8, 1, 1, benchmark::utils::CheckAVX);
   }
 
   static void f32_igemm_5x8__avx_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_5x8__avx_broadcast, 5, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_5x8__avx_broadcast, 5, 8, 1, 1, benchmark::utils::CheckAVX);
   }
 
   static void f32_igemm_6x8__avx_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__avx_broadcast, 6, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__avx_broadcast, 6, 8, 1, 1, benchmark::utils::CheckAVX);
   }
 
   static void f32_igemm_7x8__avx_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_7x8__avx_broadcast, 7, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_7x8__avx_broadcast, 7, 8, 1, 1, benchmark::utils::CheckAVX);
   }
 
   static void f32_igemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   static void f32_igemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   static void f32_igemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   static void f32_igemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   static void f32_igemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   static void f32_igemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) {
-    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1);
+    IGEMMBenchmark(state, xnn_f32_igemm_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1, benchmark::utils::CheckFMA3);
   }
 
   BENCHMARK_CONV(f32_igemm_1x8__sse_load1)