Generate QU8 GAVGPOOL microkernels from QS8 GAVGPOOL templates

PiperOrigin-RevId: 421477751
diff --git a/BUILD.bazel b/BUILD.bazel
index ae80bb3..2d3b9ce 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -241,8 +241,8 @@
     "src/qu8-dwconv/gen/up1x9-minmax-fp32-scalar-fmagic.c",
     "src/qu8-dwconv/gen/up1x25-minmax-fp32-scalar-fmagic.c",
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c",
-    "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
-    "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c1.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c1.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-fmagic.c",
     "src/qu8-gemm/gen/2x2-minmax-fp32-scalar-fmagic.c",
     "src/qu8-igemm/gen/1x2-minmax-fp32-scalar-fmagic.c",
@@ -374,8 +374,8 @@
     "src/qu8-dwconv/gen/up1x25-minmax-fp32-scalar-imagic.c",
     "src/qu8-dwconv/gen/up2x9-minmax-fp32-scalar-imagic.c",
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c",
-    "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
-    "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c4.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c4.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-imagic.c",
     "src/qu8-gemm/gen/2x2-minmax-fp32-scalar-imagic.c",
     "src/qu8-igemm/gen/1x2-minmax-fp32-scalar-imagic.c",
@@ -517,8 +517,8 @@
     "src/qu8-dwconv/gen/up2x9-minmax-fp32-scalar-lrintf.c",
     "src/qu8-dwconv/gen/up2x25-minmax-fp32-scalar-lrintf.c",
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c",
-    "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
-    "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c1.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c1.c",
     "src/qu8-gemm/gen/1x4-minmax-fp32-scalar-lrintf.c",
     "src/qu8-gemm/gen/3x4-minmax-fp32-scalar-lrintf.c",
     "src/qu8-igemm/gen/1x4-minmax-fp32-scalar-lrintf.c",
@@ -1163,8 +1163,24 @@
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x2.c",
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x3.c",
     "src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c",
-    "src/qu8-gavgpool/7p7x-minmax-scalar-c1.c",
-    "src/qu8-gavgpool/7x-minmax-scalar-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-fmagic-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-fmagic-c2.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-fmagic-c4.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c2.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-imagic-c4.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-lrintf-c1.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-lrintf-c2.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-scalar-lrintf-c4.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-fmagic-c1.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-fmagic-c2.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-fmagic-c4.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c1.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c2.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-imagic-c4.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-lrintf-c1.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-lrintf-c2.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-scalar-lrintf-c4.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-fmagic.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-imagic.c",
     "src/qu8-gemm/gen/1x2-minmax-fp32-scalar-lrintf.c",
@@ -2437,6 +2453,14 @@
     "src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x16.c",
     "src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x24.c",
     "src/qu8-f32-vcvt/gen/vcvt-wasmsimd-x32.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-wasmsimd-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-wasmsimd-c16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-wasmsimd-c24.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-wasmsimd-c32.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-wasmsimd-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-wasmsimd-c16.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-wasmsimd-c24.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-wasmsimd-c32.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-wasmsimd-dot16x2-ld64.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-wasmsimd-dot16x2-ld128.c",
     "src/qu8-gemm/gen/1x4c8-minmax-fp32-wasmsimd-dot16x2-ld64.c",
@@ -2616,8 +2640,8 @@
     "src/qu8-dwconv/gen/up8x25-minmax-rndnu-neon-mul8.c",
     "src/qu8-dwconv/gen/up16x9-minmax-rndnu-neon-mul8.c",
     "src/qu8-f32-vcvt/gen/vcvt-neon-x32.c",
-    "src/qu8-gavgpool/7p7x-minmax-neon-c8.c",
-    "src/qu8-gavgpool/7x-minmax-neon-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neon-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neon-c8.c",
     "src/qu8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qu8-gemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qu8-gemm/gen/3x8-minmax-rndnu-neon-mlal-lane.c",
@@ -3583,8 +3607,14 @@
     "src/qu8-f32-vcvt/gen/vcvt-neon-x16.c",
     "src/qu8-f32-vcvt/gen/vcvt-neon-x24.c",
     "src/qu8-f32-vcvt/gen/vcvt-neon-x32.c",
-    "src/qu8-gavgpool/7p7x-minmax-neon-c8.c",
-    "src/qu8-gavgpool/7x-minmax-neon-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neon-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neon-c16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neon-c24.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neon-c32.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neon-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neon-c16.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neon-c24.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neon-c32.c",
     "src/qu8-gemm/gen/1x8-minmax-fp32-neon-mlal-lane.c",
     "src/qu8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qu8-gemm/gen/1x16-minmax-fp32-neon-mlal-lane.c",
@@ -4279,6 +4309,14 @@
     "src/qu8-dwconv/gen/up24x25-minmax-fp32-neonv8-mul16.c",
     "src/qu8-dwconv/gen/up32x9-minmax-fp32-neonv8-mul16.c",
     "src/qu8-dwconv/gen/up32x25-minmax-fp32-neonv8-mul16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neonv8-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neonv8-c16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neonv8-c24.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-neonv8-c32.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neonv8-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neonv8-c16.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neonv8-c24.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-neonv8-c32.c",
     "src/qu8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c",
     "src/qu8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c",
     "src/qu8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c",
@@ -4779,8 +4817,8 @@
     "src/qu8-dwconv/gen/up8x9-minmax-fp32-sse2-mul16.c",
     "src/qu8-dwconv/gen/up8x25-minmax-fp32-sse2-mul16.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse2-x32.c",
-    "src/qu8-gavgpool/7p7x-minmax-sse2-c8.c",
-    "src/qu8-gavgpool/7x-minmax-sse2-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse2-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse2-c8.c",
     "src/qu8-gemm/gen/1x4c8-minmax-fp32-sse2-ld64.c",
     "src/qu8-gemm/gen/3x4c8-minmax-fp32-sse2-ld64.c",
     "src/qu8-igemm/gen/1x4c8-minmax-fp32-sse2-ld64.c",
@@ -5029,8 +5067,12 @@
     "src/qu8-f32-vcvt/gen/vcvt-sse2-x16.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse2-x24.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse2-x32.c",
-    "src/qu8-gavgpool/7p7x-minmax-sse2-c8.c",
-    "src/qu8-gavgpool/7x-minmax-sse2-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse2-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse2-c16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse2-c24.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse2-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse2-c16.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse2-c24.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-sse2-ld64.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-sse2-ld128.c",
     "src/qu8-gemm/gen/1x4c8-minmax-fp32-sse2-ld64.c",
@@ -5164,6 +5206,8 @@
     "src/qu8-dwconv/gen/up8x9-minmax-fp32-sse41-mul16.c",
     "src/qu8-dwconv/gen/up8x25-minmax-fp32-sse41-mul16.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse41-x16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse41-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse41-c8.c",
     "src/qu8-gemm/gen/1x4c8-minmax-fp32-sse41-ld64.c",
     "src/qu8-gemm/gen/3x4c8-minmax-fp32-sse41-ld64.c",
     "src/qu8-igemm/gen/1x4c8-minmax-fp32-sse41-ld64.c",
@@ -5380,6 +5424,12 @@
     "src/qu8-f32-vcvt/gen/vcvt-sse41-x16.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse41-x24.c",
     "src/qu8-f32-vcvt/gen/vcvt-sse41-x32.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse41-c8.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse41-c16.c",
+    "src/qu8-gavgpool/gen/7p7x-minmax-fp32-sse41-c24.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse41-c8.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse41-c16.c",
+    "src/qu8-gavgpool/gen/7x-minmax-fp32-sse41-c24.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-sse41-ld64.c",
     "src/qu8-gemm/gen/1x4c2-minmax-fp32-sse41-ld128.c",
     "src/qu8-gemm/gen/1x4c8-minmax-fp32-sse41-ld64.c",
@@ -11696,9 +11746,9 @@
 )
 
 xnnpack_unit_test(
-    name = "qu8_gavgpool_minmax_test",
+    name = "qu8_gavgpool_minmax_fp32_test",
     srcs = [
-        "test/qu8-gavgpool-minmax.cc",
+        "test/qu8-gavgpool-minmax-fp32.cc",
         "test/gavgpool-microkernel-tester.h",
         "src/xnnpack/AlignedAllocator.h",
     ] + MICROKERNEL_TEST_HDRS,