Split scalar production microkernels into portable, AArch32, and Wasm

PiperOrigin-RevId: 417893796
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d583a29..d368293 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -250,12 +250,18 @@
   src/tables/exp2minus-k-over-64.c
   src/tables/exp2minus-k-over-2048.c)
 
-SET (JIT_SRCS
+SET(JIT_SRCS
   src/jit/aarch32-assembler.cc
   src/jit/memory.c)
 
-SET(PROD_SCALAR_MICROKERNEL_SRCS
-  src/f16-f32-vcvt/gen/vcvt-scalar-float-x1.c
+SET(PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS
+  src/params-init.c
+  src/u8-lut32norm/scalar.c
+  src/x8-lut/gen/lut-scalar-x4.c
+  src/x32-depthtospace2d-chw2hwc/scalar.c
+  src/xx-copy/memcpy.c)
+
+SET(PROD_SCALAR_AARCH32_MICROKERNEL_SRCS
   src/f16-f32-vcvt/gen/vcvt-scalar-float-x4.c
   src/f32-argmaxpool/4x-scalar-c1.c
   src/f32-argmaxpool/9p8x-scalar-c1.c
@@ -273,15 +279,10 @@
   src/f32-dwconv/gen/up1x9-scalar-acc2.c
   src/f32-dwconv/gen/up1x25-minmax-scalar-acc2.c
   src/f32-dwconv/gen/up1x25-scalar-acc2.c
-  src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-2x1-acc2.c
   src/f32-dwconv2d-chw/gen/3x3p1-minmax-scalar-4x1.c
-  src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-1x1-acc2.c
   src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-scalar-2x1-acc2.c
-  src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-1x1-acc5.c
   src/f32-dwconv2d-chw/gen/5x5p2-minmax-scalar-2x1-acc2.c
-  src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-1x1-acc5.c
   src/f32-dwconv2d-chw/gen/5x5s2p2-minmax-scalar-2x1-acc2.c
-  src/f32-f16-vcvt/gen/vcvt-scalar-bitcast-x4.c
   src/f32-f16-vcvt/gen/vcvt-scalar-fabsf-x2.c
   src/f32-gavgpool-cw/scalar-x1.c
   src/f32-gavgpool/7p7x-minmax-scalar-c1.c
@@ -289,11 +290,7 @@
   src/f32-gemm/gen/1x4-minmax-scalar.c
   src/f32-gemm/gen/1x4-relu-scalar.c
   src/f32-gemm/gen/1x4-scalar.c
-  src/f32-gemm/gen/2x4-minmax-scalar.c
-  src/f32-gemm/gen/2x4-relu-scalar.c
-  src/f32-gemm/gen/2x4-scalar.c
   src/f32-gemm/gen/4x2-minmax-scalar.c
-  src/f32-gemm/gen/4x2-relu-scalar.c
   src/f32-gemm/gen/4x2-scalar.c
   src/f32-gemm/gen/4x4-minmax-scalar.c
   src/f32-gemm/gen/4x4-relu-scalar.c
@@ -303,11 +300,7 @@
   src/f32-igemm/gen/1x4-minmax-scalar.c
   src/f32-igemm/gen/1x4-relu-scalar.c
   src/f32-igemm/gen/1x4-scalar.c
-  src/f32-igemm/gen/2x4-minmax-scalar.c
-  src/f32-igemm/gen/2x4-relu-scalar.c
-  src/f32-igemm/gen/2x4-scalar.c
   src/f32-igemm/gen/4x2-minmax-scalar.c
-  src/f32-igemm/gen/4x2-relu-scalar.c
   src/f32-igemm/gen/4x2-scalar.c
   src/f32-igemm/gen/4x4-minmax-scalar.c
   src/f32-igemm/gen/4x4-relu-scalar.c
@@ -316,9 +309,7 @@
   src/f32-pavgpool/9p8x-minmax-scalar-c1.c
   src/f32-pavgpool/9x-minmax-scalar-c1.c
   src/f32-prelu/gen/scalar-2x4.c
-  src/f32-qs8-vcvt/gen/vcvt-scalar-magic-iminmax-x1.c
   src/f32-qs8-vcvt/gen/vcvt-scalar-magic-iminmax-x4.c
-  src/f32-qu8-vcvt/gen/vcvt-scalar-magic-iminmax-x1.c
   src/f32-qu8-vcvt/gen/vcvt-scalar-magic-iminmax-x4.c
   src/f32-raddstoreexpminusmax/gen/scalar-p5-x4-acc2.c
   src/f32-rmax/scalar.c
@@ -328,9 +319,7 @@
   src/f32-vbinary/gen/vadd-minmax-scalar-x8.c
   src/f32-vbinary/gen/vaddc-minmax-scalar-x8.c
   src/f32-vbinary/gen/vdiv-minmax-scalar-x2.c
-  src/f32-vbinary/gen/vdiv-minmax-scalar-x8.c
   src/f32-vbinary/gen/vdivc-minmax-scalar-x2.c
-  src/f32-vbinary/gen/vdivc-minmax-scalar-x8.c
   src/f32-vbinary/gen/vmax-scalar-x8.c
   src/f32-vbinary/gen/vmaxc-scalar-x8.c
   src/f32-vbinary/gen/vmin-scalar-x8.c
@@ -338,109 +327,71 @@
   src/f32-vbinary/gen/vmul-minmax-scalar-x8.c
   src/f32-vbinary/gen/vmulc-minmax-scalar-x8.c
   src/f32-vbinary/gen/vrdivc-minmax-scalar-x2.c
-  src/f32-vbinary/gen/vrdivc-minmax-scalar-x8.c
   src/f32-vbinary/gen/vrsubc-minmax-scalar-x8.c
   src/f32-vbinary/gen/vsqrdiff-scalar-x8.c
   src/f32-vbinary/gen/vsqrdiffc-scalar-x8.c
   src/f32-vbinary/gen/vsub-minmax-scalar-x8.c
   src/f32-vbinary/gen/vsubc-minmax-scalar-x8.c
   src/f32-vclamp/gen/vclamp-scalar-x4.c
-  src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x2.c
   src/f32-velu/gen/velu-scalar-rr2-lut16-p3-x4.c
   src/f32-vhswish/gen/vhswish-scalar-x4.c
   src/f32-vlrelu/gen/vlrelu-scalar-x4.c
   src/f32-vmulcaddc/gen/c1-minmax-scalar-2x.c
   src/f32-vrelu/gen/vrelu-scalar-x8.c
   src/f32-vrnd/gen/vrndd-scalar-libm-x1.c
-  src/f32-vrnd/gen/vrndd-scalar-libm-x4.c
   src/f32-vrnd/gen/vrndne-scalar-libm-x1.c
-  src/f32-vrnd/gen/vrndne-scalar-libm-x4.c
   src/f32-vrnd/gen/vrndu-scalar-libm-x1.c
-  src/f32-vrnd/gen/vrndu-scalar-libm-x4.c
   src/f32-vrnd/gen/vrndz-scalar-libm-x1.c
-  src/f32-vrnd/gen/vrndz-scalar-libm-x4.c
   src/f32-vsigmoid/gen/vsigmoid-scalar-lut64-p2-div-x2.c
   src/f32-vsqrt/gen/scalar-sqrt-x1.c
   src/f32-vunary/gen/vabs-scalar-x4.c
   src/f32-vunary/gen/vneg-scalar-x4.c
   src/f32-vunary/gen/vsqr-scalar-x4.c
-  src/params-init.c
   src/qc8-dwconv/gen/up2x9-minmax-fp32-scalar-magic.c
   src/qc8-dwconv/gen/up2x25-minmax-fp32-scalar-magic.c
   src/qc8-gemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qc8-gemm/gen/1x4-minmax-fp32-scalar-magic.c
   src/qc8-gemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qc8-gemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qc8-igemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qc8-igemm/gen/1x4-minmax-fp32-scalar-magic.c
   src/qc8-igemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qc8-igemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qs8-dwconv/gen/up1x9-minmax-fp32-scalar-magic.c
   src/qs8-dwconv/gen/up1x25-minmax-fp32-scalar-magic.c
-  src/qs8-dwconv/gen/up2x9-minmax-fp32-scalar-magic.c
-  src/qs8-dwconv/gen/up2x25-minmax-fp32-scalar-magic.c
-  src/qs8-f32-vcvt/gen/vcvt-scalar-x1.c
   src/qs8-f32-vcvt/gen/vcvt-scalar-x4.c
   src/qs8-gavgpool/gen/7p7x-minmax-scalar-c1.c
-  src/qs8-gavgpool/gen/7p7x-minmax-scalar-c4.c
   src/qs8-gavgpool/gen/7x-minmax-scalar-c1.c
-  src/qs8-gavgpool/gen/7x-minmax-scalar-c4.c
   src/qs8-gemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qs8-gemm/gen/1x4-minmax-fp32-scalar-magic.c
-  src/qs8-gemm/gen/1x4-minmax-rndnu-scalar.c
   src/qs8-gemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qs8-gemm/gen/3x4-minmax-rndnu-scalar.c
-  src/qs8-gemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qs8-igemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qs8-igemm/gen/1x4-minmax-fp32-scalar-magic.c
-  src/qs8-igemm/gen/1x4-minmax-rndnu-scalar.c
   src/qs8-igemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qs8-igemm/gen/3x4-minmax-rndnu-scalar.c
-  src/qs8-igemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qs8-vadd/gen/minmax-scalar-x1.c
-  src/qs8-vadd/gen/minmax-scalar-x4.c
   src/qs8-vaddc/gen/minmax-scalar-x1.c
-  src/qs8-vaddc/gen/minmax-scalar-x4.c
   src/qs8-vmul/gen/minmax-fp32-scalar-x4.c
   src/qs8-vmulc/gen/minmax-fp32-scalar-x4.c
   src/qu8-avgpool/9p8x-minmax-scalar-c1.c
   src/qu8-avgpool/9x-minmax-scalar-c1.c
   src/qu8-dwconv/gen/up1x9-minmax-fp32-scalar-magic.c
   src/qu8-dwconv/gen/up1x25-minmax-fp32-scalar-magic.c
-  src/qu8-dwconv/gen/up2x9-minmax-fp32-scalar-magic.c
-  src/qu8-dwconv/gen/up2x25-minmax-fp32-scalar-magic.c
-  src/qu8-f32-vcvt/gen/vcvt-scalar-x1.c
   src/qu8-f32-vcvt/gen/vcvt-scalar-x4.c
   src/qu8-gavgpool/7p7x-minmax-scalar-c1.c
   src/qu8-gavgpool/7x-minmax-scalar-c1.c
   src/qu8-gemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qu8-gemm/gen/1x4-minmax-fp32-scalar-magic.c
   src/qu8-gemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qu8-gemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qu8-igemm/gen/1x2-minmax-fp32-scalar-magic.c
-  src/qu8-igemm/gen/1x4-minmax-fp32-scalar-magic.c
   src/qu8-igemm/gen/2x2-minmax-fp32-scalar-magic.c
-  src/qu8-igemm/gen/4x4-minmax-fp32-scalar-magic.c
   src/qu8-vadd/gen/minmax-scalar-x1.c
-  src/qu8-vadd/gen/minmax-scalar-x4.c
   src/qu8-vaddc/gen/minmax-scalar-x1.c
-  src/qu8-vaddc/gen/minmax-scalar-x4.c
   src/qu8-vmul/gen/minmax-fp32-scalar-x4.c
   src/qu8-vmulc/gen/minmax-fp32-scalar-x4.c
   src/s8-ibilinear/gen/scalar-c1.c
   src/s8-maxpool/9p8x-minmax-scalar-c1.c
   src/s8-vclamp/scalar-x4.c
   src/u8-ibilinear/gen/scalar-c1.c
-  src/u8-lut32norm/scalar.c
   src/u8-maxpool/9p8x-minmax-scalar-c1.c
   src/u8-rmax/scalar.c
   src/u8-vclamp/scalar-x4.c
-  src/x8-lut/gen/lut-scalar-x4.c
   src/x8-zip/x2-scalar.c
   src/x8-zip/x3-scalar.c
   src/x8-zip/x4-scalar.c
   src/x8-zip/xm-scalar.c
-  src/x32-depthtospace2d-chw2hwc/scalar.c
   src/x32-packx/x2-scalar.c
   src/x32-packx/x3-scalar.c
   src/x32-packx/x4-scalar.c
@@ -449,7 +400,6 @@
   src/x32-zip/x3-scalar.c
   src/x32-zip/x4-scalar.c
   src/x32-zip/xm-scalar.c
-  src/xx-copy/memcpy.c
   src/xx-fill/scalar-x16.c
   src/xx-pad/scalar.c)
 
@@ -5501,9 +5451,10 @@
   src/qu8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-cortex-a55.S
   src/qu8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S)
 
-SET(PROD_MICROKERNEL_SRCS ${PROD_SCALAR_MICROKERNEL_SRCS})
+SET(PROD_MICROKERNEL_SRCS ${PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS})
 SET(ALL_MICROKERNEL_SRCS ${ALL_SCALAR_MICROKERNEL_SRCS})
 IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
+  LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_SCALAR_AARCH32_MICROKERNEL_SRCS})
   LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_NEON_MICROKERNEL_SRCS})
   LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_NEONFP16_MICROKERNEL_SRCS})
   LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_NEONFMA_MICROKERNEL_SRCS})