Vector Elementwise Binary with Scalar microkernels in WAsm SIMD

PiperOrigin-RevId: 318585022
diff --git a/BUILD.bazel b/BUILD.bazel
index e1307c3..df33b8c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -623,28 +623,64 @@
     "src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x8.c",
     "src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x4.c",
     "src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x8.c",
     "src/f32-vbinary/gen/vmax-wasmsimd-arm-x4.c",
     "src/f32-vbinary/gen/vmax-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vmax-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x8.c",
     "src/f32-vbinary/gen/vmin-wasmsimd-arm-x4.c",
     "src/f32-vbinary/gen/vmin-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vmin-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vmin-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vminc-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vminc-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vminc-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vminc-wasmsimd-x86-x8.c",
     "src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x4.c",
     "src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x4.c",
+    "src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x8.c",
+    "src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x8.c",
     "src/f32-vbinary/gen/vsqrdiff-wasmsimd-x4.c",
     "src/f32-vbinary/gen/vsqrdiff-wasmsimd-x8.c",
+    "src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x4.c",
+    "src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x8.c",
     "src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x4.c",
     "src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x8.c",
     "src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x4.c",
     "src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x8.c",
+    "src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x4.c",
+    "src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x8.c",
+    "src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x4.c",
+    "src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x8.c",
     "src/math/roundne-wasmsimd-addsub.c",
     "src/math/roundd-wasmsimd-addsub.c",
     "src/math/roundu-wasmsimd-addsub.c",
@@ -2278,7 +2314,7 @@
         "-msimd128",
         "-munimplemented-simd128",
     ],
-    wasmsimd_srcs = WASMSIMD_UKERNELS,
+    wasmsimd_srcs = WASM_UKERNELS + WASMSIMD_UKERNELS,
     deps = [
         ":tables",
         "@FP16",
diff --git a/scripts/generate-f32-vbinary.sh b/scripts/generate-f32-vbinary.sh
index 9a0b334..636e192 100755
--- a/scripts/generate-f32-vbinary.sh
+++ b/scripts/generate-f32-vbinary.sh
@@ -105,34 +105,72 @@
 tools/xngen src/f32-vbinary/vopc-scalar.c.in -D OP=RSUB -D BATCH_TILE=4 -D WASM=1 -D ACTIVATION=MINMAX -o src/f32-vbinary/gen/vrsubc-minmax-wasm-x4.c
 
 ################################## WAsm SIMD ##################################
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX     -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmax-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX     -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmax-wasmsimd-arm-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN     -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmin-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN     -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmin-wasmsimd-arm-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmin-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmin-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-arm-x8.c
 
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX     -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmax-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX     -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmax-wasmsimd-x86-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN     -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmin-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN     -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmin-wasmsimd-x86-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x8.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB     -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=ADD -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vadd-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=DIV -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdiv-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MAX -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmin-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MIN -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmin-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=MUL -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmul-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SUB -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsub-minmax-wasmsimd-x86-x8.c
 
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SQRDIFF -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiff-wasmsimd-x4.c
-tools/xngen src/f32-vbinary/vop-wasmsimd.c.in -D OP=SQRDIFF -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiff-wasmsimd-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=ADD  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=ADD  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=DIV  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=DIV  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RDIV -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RDIV -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MAX  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MAX  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MIN  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vminc-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MIN  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vminc-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MUL  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MUL  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SUB  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SUB  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSUB -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSUB -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=0 -o src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x8.c
+
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=ADD  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=ADD  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=DIV  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=DIV  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RDIV -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RDIV -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MAX  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MAX  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MIN  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vminc-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MIN  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=1 -o src/f32-vbinary/gen/vminc-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MUL  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=MUL  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SUB  -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SUB  -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSUB -D BATCH_TILE=4 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSUB -D BATCH_TILE=8 -D ACTIVATION=MINMAX -D X86=1 -o src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x8.c
+
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in  -D OP=SQRDIFF  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiff-wasmsimd-x4.c
+tools/xngen src/f32-vbinary/vop-wasmsimd.c.in  -D OP=SQRDIFF  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiff-wasmsimd-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SQRDIFF  -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=SQRDIFF  -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x8.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSQRDIFF -D BATCH_TILE=4 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x4.c
+tools/xngen src/f32-vbinary/vopc-wasmsimd.c.in -D OP=RSQRDIFF -D BATCH_TILE=8 -D ACTIVATION=LINEAR -D X86=0 -o src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x8.c
 
 ################################### ARM NEON ##################################
 tools/xngen src/f32-vbinary/vop-neon.c.in -D OP=ADD     -D BATCH_TILE=4 -D ACTIVATION=MINMAX -o src/f32-vbinary/gen/vadd-minmax-neon-x4.c
diff --git a/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..dee652e
--- /dev/null
+++ b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_add(va0123, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..f2cc655
--- /dev/null
+++ b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_add(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_add(va4567, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..9062b48
--- /dev/null
+++ b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_add(va0123, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..c030c2d
--- /dev/null
+++ b/src/f32-vbinary/gen/vaddc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_add(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_add(va4567, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_add(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..37eb288
--- /dev/null
+++ b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_div(va0123, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..ad4f093
--- /dev/null
+++ b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_div(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_div(va4567, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..d6b5f4b
--- /dev/null
+++ b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_div(va0123, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..00310e3
--- /dev/null
+++ b/src/f32-vbinary/gen/vdivc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_div(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_div(va4567, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..222ab07
--- /dev/null
+++ b/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x4.c
@@ -0,0 +1,56 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_max(va0123, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_max(va, vb);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..007ad84
--- /dev/null
+++ b/src/f32-vbinary/gen/vmaxc-wasmsimd-arm-x8.c
@@ -0,0 +1,69 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_max(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_max(va4567, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_max(va, vb);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_max(va, vb);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..aedf1f8
--- /dev/null
+++ b/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x4.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_max(va0123, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    const v128_t vm = wasm_f32x4_le(va, vb);
+    v128_t vy = wasm_v128_bitselect(vb, va, vm);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..5622cec
--- /dev/null
+++ b/src/f32-vbinary/gen/vmaxc-wasmsimd-x86-x8.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_max(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_max(va4567, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    const v128_t vm = wasm_f32x4_le(va, vb);
+    v128_t vy = wasm_v128_bitselect(vb, va, vm);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    const v128_t vm = wasm_f32x4_le(va, vb);
+    v128_t vy = wasm_v128_bitselect(vb, va, vm);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vminc-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vminc-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..be3b869
--- /dev/null
+++ b/src/f32-vbinary/gen/vminc-wasmsimd-arm-x4.c
@@ -0,0 +1,56 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vminc_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_min(va0123, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_min(va, vb);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vminc-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vminc-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..6b1f726
--- /dev/null
+++ b/src/f32-vbinary/gen/vminc-wasmsimd-arm-x8.c
@@ -0,0 +1,69 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vminc_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_min(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_min(va4567, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_min(va, vb);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_min(va, vb);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vminc-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vminc-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..966b29e
--- /dev/null
+++ b/src/f32-vbinary/gen/vminc-wasmsimd-x86-x4.c
@@ -0,0 +1,57 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vminc_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_min(va0123, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    const v128_t vm = wasm_f32x4_lt(va, vb);
+    v128_t vy = wasm_v128_bitselect(va, vb, vm);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vminc-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vminc-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..10e4690
--- /dev/null
+++ b/src/f32-vbinary/gen/vminc-wasmsimd-x86-x8.c
@@ -0,0 +1,71 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vminc_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_min(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_min(va4567, vb);
+
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    const v128_t vm = wasm_f32x4_lt(va, vb);
+    v128_t vy = wasm_v128_bitselect(va, vb, vm);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    const v128_t vm = wasm_f32x4_lt(va, vb);
+    v128_t vy = wasm_v128_bitselect(va, vb, vm);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..55fc2d2
--- /dev/null
+++ b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_mul(va0123, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..1d35091
--- /dev/null
+++ b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_mul(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_mul(va4567, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..e83c5e2
--- /dev/null
+++ b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_mul(va0123, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..c5ba724
--- /dev/null
+++ b/src/f32-vbinary/gen/vmulc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_mul(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_mul(va4567, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_mul(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..2b25852
--- /dev/null
+++ b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_div(vb, va0123);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..fc28788
--- /dev/null
+++ b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_div(vb, va0123);
+    v128_t vy4567 = wasm_f32x4_div(vb, va4567);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..27a516c
--- /dev/null
+++ b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_div(vb, va0123);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..4e41a7d
--- /dev/null
+++ b/src/f32-vbinary/gen/vrdivc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_div(vb, va0123);
+    v128_t vy4567 = wasm_f32x4_div(vb, va4567);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_div(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x4.c b/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x4.c
new file mode 100644
index 0000000..4138107
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x4.c
@@ -0,0 +1,58 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsqrdiffc_ukernel__wasmsimd_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+
+    vy0123 = wasm_f32x4_mul(vy0123, vy0123);
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x8.c b/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x8.c
new file mode 100644
index 0000000..4495124
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsqrdiffc-wasmsimd-x8.c
@@ -0,0 +1,73 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsqrdiffc_ukernel__wasmsimd_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+    v128_t vy4567 = wasm_f32x4_sub(vb, va4567);
+
+    vy0123 = wasm_f32x4_mul(vy0123, vy0123);
+    vy4567 = wasm_f32x4_mul(vy4567, vy4567);
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..5a2d27d
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..3944715
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+    v128_t vy4567 = wasm_f32x4_sub(vb, va4567);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..3652662
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..cff00af
--- /dev/null
+++ b/src/f32-vbinary/gen/vrsubc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(vb, va0123);
+    v128_t vy4567 = wasm_f32x4_sub(vb, va4567);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(vb, va);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x4.c b/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x4.c
new file mode 100644
index 0000000..fbfcb40
--- /dev/null
+++ b/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x4.c
@@ -0,0 +1,58 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+
+    vy0123 = wasm_f32x4_mul(vy0123, vy0123);
+
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x8.c b/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x8.c
new file mode 100644
index 0000000..e22ef37
--- /dev/null
+++ b/src/f32-vbinary/gen/vsqrdiffc-wasmsimd-x8.c
@@ -0,0 +1,73 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_default_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_sub(va4567, vb);
+
+    vy0123 = wasm_f32x4_mul(vy0123, vy0123);
+    vy4567 = wasm_f32x4_mul(vy4567, vy4567);
+
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+    vy = wasm_f32x4_mul(vy, vy);
+
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x4.c b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x4.c
new file mode 100644
index 0000000..0ee3672
--- /dev/null
+++ b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x4.c
@@ -0,0 +1,63 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x8.c b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x8.c
new file mode 100644
index 0000000..be93ed0
--- /dev/null
+++ b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-arm-x8.c
@@ -0,0 +1,80 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_sub(va4567, vb);
+
+
+    vy0123 = wasm_f32x4_max(vy0123, vy_min);
+    vy4567 = wasm_f32x4_max(vy4567, vy_min);
+
+    vy0123 = wasm_f32x4_min(vy0123, vy_max);
+    vy4567 = wasm_f32x4_min(vy4567, vy_max);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    vy = wasm_f32x4_max(vy, vy_min);
+    vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x4.c b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x4.c
new file mode 100644
index 0000000..4aebc52
--- /dev/null
+++ b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x4.c
@@ -0,0 +1,68 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+
+    wasm_v128_store(y, vy0123);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x8.c b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x8.c
new file mode 100644
index 0000000..14d10eb
--- /dev/null
+++ b/src/f32-vbinary/gen/vsubc-minmax-wasmsimd-x86-x8.c
@@ -0,0 +1,89 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-wasmsimd.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+  const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) {
+    const v128_t va0123 = wasm_v128_load(a);
+    const v128_t va4567 = wasm_v128_load(a + 4);
+    a += 8;
+
+    v128_t vy0123 = wasm_f32x4_sub(va0123, vb);
+    v128_t vy4567 = wasm_f32x4_sub(va4567, vb);
+
+
+    const v128_t vltmask0123 = wasm_f32x4_lt(vy0123, vy_min);
+    const v128_t vltmask4567 = wasm_f32x4_lt(vy4567, vy_min);
+
+    const v128_t vngtmask0123 = wasm_f32x4_le(vy0123, vy_max);
+    vy0123 = wasm_v128_bitselect(vy_min, vy0123, vltmask0123);
+    const v128_t vngtmask4567 = wasm_f32x4_le(vy4567, vy_max);
+    vy4567 = wasm_v128_bitselect(vy_min, vy4567, vltmask4567);
+
+    vy0123 = wasm_v128_bitselect(vy0123, vy_max, vngtmask0123);
+    vy4567 = wasm_v128_bitselect(vy4567, vy_max, vngtmask4567);
+
+    wasm_v128_store(y, vy0123);
+    wasm_v128_store(y + 4, vy4567);
+    y += 8;
+  }
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const v128_t va = wasm_v128_load(a);
+    a += 4;
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    wasm_v128_store(y, vy);
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    v128_t vy = wasm_f32x4_sub(va, vb);
+
+    const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+    const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+    vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+    vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/f32-vbinary/vopc-wasmsimd.c.in b/src/f32-vbinary/vopc-wasmsimd.c.in
new file mode 100644
index 0000000..61727d0
--- /dev/null
+++ b/src/f32-vbinary/vopc-wasmsimd.c.in
@@ -0,0 +1,147 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert BATCH_TILE % 4 == 0
+$assert BATCH_TILE >= 4
+$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+$assert OP in ["ADD", "DIV", "RDIV", "MAX", "MIN", "MUL", "SUB", "RSUB", "SQRDIFF", "RSQRDIFF"]
+$assert ACTIVATION in ["LINEAR", "MINMAX"]
+#include <assert.h>
+
+#include <wasm_simd128.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/vbinary.h>
+
+
+$WASM_F32X4_OP = {
+$  "ADD": lambda x: "wasm_f32x4_add(%s, vb)" % x,
+$  "DIV": lambda x: "wasm_f32x4_div(%s, vb)" % x,
+$  "RDIV": lambda x: "wasm_f32x4_div(vb, %s)" % x,
+$  "MAX": lambda x: "wasm_f32x4_max(%s, vb)" % x,
+$  "MIN": lambda x: "wasm_f32x4_min(%s, vb)" % x,
+$  "MUL": lambda x: "wasm_f32x4_mul(%s, vb)" % x,
+$  "SUB": lambda x: "wasm_f32x4_sub(%s, vb)" % x,
+$  "RSUB": lambda x: "wasm_f32x4_sub(vb, %s)" % x,
+$  "SQRDIFF": lambda x: "wasm_f32x4_sub(%s, vb)" % x,
+$  "RSQRDIFF": lambda x: "wasm_f32x4_sub(vb, %s)" % x,
+$}[OP]
+$ARCH_SUFFIX = "" if ACTIVATION == "LINEAR" and OP not in ["MIN", "MAX"] else "_x86" if X86 else "_arm"
+$ACTIVATION_SUFFIX = {"LINEAR": "", "MINMAX": "_minmax"}[ACTIVATION]
+$PARAMS = {"LINEAR": "xnn_f32_default_params", "MINMAX": "xnn_f32_minmax_params"}[ACTIVATION]
+void xnn_f32_v${OP.lower()}c${ACTIVATION_SUFFIX}_ukernel__wasmsimd${ARCH_SUFFIX}_x${BATCH_TILE}(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union ${PARAMS} params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+
+  $if ACTIVATION == "MINMAX":
+    const v128_t vy_min = wasm_v32x4_load_splat(&params->scalar.min);
+    const v128_t vy_max = wasm_v32x4_load_splat(&params->scalar.max);
+
+  const v128_t vb = wasm_v32x4_load_splat(b);
+  for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
+    const v128_t va${ABC[0:4]} = wasm_v128_load(a);
+    $for N in range(4, BATCH_TILE, 4):
+      const v128_t va${ABC[N:N+4]} = wasm_v128_load(a + ${N});
+    a += ${BATCH_TILE};
+
+    $for N in range(0, BATCH_TILE, 4):
+      v128_t vy${ABC[N:N+4]} = ${WASM_F32X4_OP("va" + ABC[N:N+4])};
+
+    $if OP in ["SQRDIFF", "RSQRDIFF"]:
+      $for N in range(0, BATCH_TILE, 4):
+        vy${ABC[N:N+4]} = wasm_f32x4_mul(vy${ABC[N:N+4]}, vy${ABC[N:N+4]});
+
+    $if ACTIVATION == "MINMAX":
+      $if X86:
+        $for N in range(0, BATCH_TILE, 4):
+          const v128_t vltmask${ABC[N:N+4]} = wasm_f32x4_lt(vy${ABC[N:N+4]}, vy_min);
+
+        $for N in range(0, BATCH_TILE, 4):
+          const v128_t vngtmask${ABC[N:N+4]} = wasm_f32x4_le(vy${ABC[N:N+4]}, vy_max);
+          vy${ABC[N:N+4]} = wasm_v128_bitselect(vy_min, vy${ABC[N:N+4]}, vltmask${ABC[N:N+4]});
+
+        $for N in range(0, BATCH_TILE, 4):
+          vy${ABC[N:N+4]} = wasm_v128_bitselect(vy${ABC[N:N+4]}, vy_max, vngtmask${ABC[N:N+4]});
+      $else:
+        $for N in range(0, BATCH_TILE, 4):
+          vy${ABC[N:N+4]} = wasm_f32x4_max(vy${ABC[N:N+4]}, vy_min);
+
+        $for N in range(0, BATCH_TILE, 4):
+          vy${ABC[N:N+4]} = wasm_f32x4_min(vy${ABC[N:N+4]}, vy_max);
+
+    wasm_v128_store(y, vy${ABC[0:4]});
+    $for N in range(4, BATCH_TILE, 4):
+      wasm_v128_store(y + ${N}, vy${ABC[N:N+4]});
+    y += ${BATCH_TILE};
+  }
+  $if BATCH_TILE > 4:
+    for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+      const v128_t va = wasm_v128_load(a);
+      a += 4;
+
+      $if OP == "MIN" and X86:
+        const v128_t vm = wasm_f32x4_lt(va, vb);
+        v128_t vy = wasm_v128_bitselect(va, vb, vm);
+      $elif OP == "MAX" and X86:
+        const v128_t vm = wasm_f32x4_le(va, vb);
+        v128_t vy = wasm_v128_bitselect(vb, va, vm);
+      $else:
+        v128_t vy = ${WASM_F32X4_OP("va")};
+        $if OP in ["SQRDIFF", "RSQRDIFF"]:
+          vy = wasm_f32x4_mul(vy, vy);
+
+      $if ACTIVATION == "MINMAX":
+        $if X86:
+          const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+          const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+          vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+          vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+        $else:
+          vy = wasm_f32x4_max(vy, vy_min);
+          vy = wasm_f32x4_min(vy, vy_max);
+
+      wasm_v128_store(y, vy);
+      y += 4;
+    }
+  if XNN_UNLIKELY(n != 0) {
+    const v128_t va = wasm_v128_load(a);
+
+    $if OP == "MIN" and X86:
+      const v128_t vm = wasm_f32x4_lt(va, vb);
+      v128_t vy = wasm_v128_bitselect(va, vb, vm);
+    $elif OP == "MAX" and X86:
+      const v128_t vm = wasm_f32x4_le(va, vb);
+      v128_t vy = wasm_v128_bitselect(vb, va, vm);
+    $else:
+      v128_t vy = ${WASM_F32X4_OP("va")};
+      $if OP in ["SQRDIFF", "RSQRDIFF"]:
+        vy = wasm_f32x4_mul(vy, vy);
+
+    $if ACTIVATION == "MINMAX":
+      $if X86:
+        const v128_t vltmask = wasm_f32x4_lt(vy, vy_min);
+        const v128_t vngtmask = wasm_f32x4_le(vy, vy_max);
+        vy = wasm_v128_bitselect(vy_min, vy, vltmask);
+        vy = wasm_v128_bitselect(vy, vy_max, vngtmask);
+      $else:
+        vy = wasm_f32x4_max(vy, vy_min);
+        vy = wasm_f32x4_min(vy, vy_max);
+
+    if (n & (2 * sizeof(float))) {
+      *((double*) y) = wasm_f64x2_extract_lane(vy, 0);
+      vy = wasm_v32x4_shuffle(vy, vy, 2, 3, 2, 3);
+      y += 2;
+    }
+    if (n & (1 * sizeof(float))) {
+      *y = wasm_f32x4_extract_lane(vy, 0);
+    }
+  }
+}
diff --git a/src/xnnpack/vbinary.h b/src/xnnpack/vbinary.h
index 7338bce..460c7ca 100644
--- a/src/xnnpack/vbinary.h
+++ b/src/xnnpack/vbinary.h
@@ -240,6 +240,10 @@
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__avx_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__avx512f_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__avx512f_x32)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vmaxc_ukernel__wasm_x4)
@@ -257,6 +261,10 @@
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__avx_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__avx512f_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__avx512f_x32)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vminc_ukernel__wasm_x4)
@@ -274,6 +282,8 @@
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__avx_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__avx512f_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__avx512f_x32)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__scalar_x1)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__scalar_x2)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vsqrdiffc_ukernel__scalar_x4)
@@ -288,6 +298,8 @@
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__avx_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__avx512f_x16)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__avx512f_x32)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__wasmsimd_x4)
+DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__wasmsimd_x8)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__scalar_x1)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__scalar_x2)
 DECLARE_F32_VBINOP_UKERNEL_FUNCTION(xnn_f32_vrsqrdiffc_ukernel__scalar_x4)
@@ -302,6 +314,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__avx512f_x32)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__psimd_x4)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__psimd_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vaddc_minmax_ukernel__wasm_x4)
@@ -319,6 +335,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__avx512f_x32)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__psimd_x4)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__psimd_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vdivc_minmax_ukernel__wasm_x4)
@@ -336,6 +356,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__avx512f_x32)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__psimd_x4)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__psimd_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrdivc_minmax_ukernel__wasm_x4)
@@ -353,6 +377,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__avx_x16)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__avx512f_x16)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__avx512f_x32)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulc_minmax_ukernel__wasm_x4)
@@ -370,6 +398,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__avx512f_x32)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__psimd_x4)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__psimd_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vsubc_minmax_ukernel__wasm_x4)
@@ -387,6 +419,10 @@
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__avx512f_x32)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__psimd_x4)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__psimd_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4)
+DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasm_x1)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasm_x2)
 DECLARE_F32_VBINOP_MINMAX_UKERNEL_FUNCTION(xnn_f32_vrsubc_minmax_ukernel__wasm_x4)
diff --git a/test/f32-vaddc-minmax.cc b/test/f32-vaddc-minmax.cc
index 20e7e3b..e66e974 100644
--- a/test/f32-vaddc-minmax.cc
+++ b/test/f32-vaddc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VADDC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::AddC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VADDC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vaddc-minmax.yaml b/test/f32-vaddc-minmax.yaml
index d68af5e..5923aaa 100644
--- a/test/f32-vaddc-minmax.yaml
+++ b/test/f32-vaddc-minmax.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vaddc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vaddc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vaddc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vaddc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vaddc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vaddc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vaddc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vaddc_minmax_ukernel__wasm_x4
diff --git a/test/f32-vdivc-minmax.cc b/test/f32-vdivc-minmax.cc
index c66a977..fc24f56 100644
--- a/test/f32-vdivc-minmax.cc
+++ b/test/f32-vdivc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VDIVC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::DivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VDIVC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vdivc-minmax.yaml b/test/f32-vdivc-minmax.yaml
index e6e39b0..1b6b0dc 100644
--- a/test/f32-vdivc-minmax.yaml
+++ b/test/f32-vdivc-minmax.yaml
@@ -16,6 +16,10 @@
 - name: xnn_f32_vdivc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vdivc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vdivc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vdivc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vdivc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vdivc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vdivc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vdivc_minmax_ukernel__wasm_x4
diff --git a/test/f32-vmaxc.cc b/test/f32-vmaxc.cc
index 5f2c231..625815f 100644
--- a/test/f32-vmaxc.cc
+++ b/test/f32-vmaxc.cc
@@ -487,6 +487,174 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMAXC__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMAXC__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMAXC__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMAXC__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMAXC__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MaxC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VMAXC__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vmaxc.yaml b/test/f32-vmaxc.yaml
index d566996..9dcc507 100644
--- a/test/f32-vmaxc.yaml
+++ b/test/f32-vmaxc.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vmaxc_ukernel__avx512f_x32
 - name: xnn_f32_vmaxc_ukernel__psimd_x4
 - name: xnn_f32_vmaxc_ukernel__psimd_x8
+- name: xnn_f32_vmaxc_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vmaxc_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vmaxc_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vmaxc_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vmaxc_ukernel__wasm_x1
 - name: xnn_f32_vmaxc_ukernel__wasm_x2
 - name: xnn_f32_vmaxc_ukernel__wasm_x4
diff --git a/test/f32-vminc.cc b/test/f32-vminc.cc
index f043b3a..9e752ed 100644
--- a/test/f32-vminc.cc
+++ b/test/f32-vminc.cc
@@ -487,6 +487,174 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMINC__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMINC__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMINC__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMINC__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMINC__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vminc_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MinC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VMINC__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vminc.yaml b/test/f32-vminc.yaml
index 922b2f5..1914cc6 100644
--- a/test/f32-vminc.yaml
+++ b/test/f32-vminc.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vminc_ukernel__avx512f_x32
 - name: xnn_f32_vminc_ukernel__psimd_x4
 - name: xnn_f32_vminc_ukernel__psimd_x8
+- name: xnn_f32_vminc_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vminc_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vminc_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vminc_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vminc_ukernel__wasm_x1
 - name: xnn_f32_vminc_ukernel__wasm_x2
 - name: xnn_f32_vminc_ukernel__wasm_x4
diff --git a/test/f32-vmulc-minmax.cc b/test/f32-vmulc-minmax.cc
index 0c7b55e..5e0f080 100644
--- a/test/f32-vmulc-minmax.cc
+++ b/test/f32-vmulc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VMULC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::MulC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VMULC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vmulc-minmax.yaml b/test/f32-vmulc-minmax.yaml
index 3bf09e8..35ddd80 100644
--- a/test/f32-vmulc-minmax.yaml
+++ b/test/f32-vmulc-minmax.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vmulc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vmulc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vmulc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vmulc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vmulc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vmulc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vmulc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vmulc_minmax_ukernel__wasm_x4
diff --git a/test/f32-vrdivc-minmax.cc b/test/f32-vrdivc-minmax.cc
index 4f2810e..477056b 100644
--- a/test/f32-vrdivc-minmax.cc
+++ b/test/f32-vrdivc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRDIVC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RDivC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VRDIVC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vrdivc-minmax.yaml b/test/f32-vrdivc-minmax.yaml
index 32493ed..2530bdd 100644
--- a/test/f32-vrdivc-minmax.yaml
+++ b/test/f32-vrdivc-minmax.yaml
@@ -16,6 +16,10 @@
 - name: xnn_f32_vrdivc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vrdivc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vrdivc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vrdivc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vrdivc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vrdivc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vrdivc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vrdivc_minmax_ukernel__wasm_x4
diff --git a/test/f32-vrsubc-minmax.cc b/test/f32-vrsubc-minmax.cc
index d4c9837..6fb0547 100644
--- a/test/f32-vrsubc-minmax.cc
+++ b/test/f32-vrsubc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VRSUBC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::RSubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VRSUBC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vrsubc-minmax.yaml b/test/f32-vrsubc-minmax.yaml
index 94acd01..ccd7abb 100644
--- a/test/f32-vrsubc-minmax.yaml
+++ b/test/f32-vrsubc-minmax.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vrsubc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vrsubc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vrsubc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vrsubc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vrsubc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vrsubc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vrsubc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vrsubc_minmax_ukernel__wasm_x4
diff --git a/test/f32-vsqrdiffc.cc b/test/f32-vsqrdiffc.cc
index dc76517..52fb45d 100644
--- a/test/f32-vsqrdiffc.cc
+++ b/test/f32-vsqrdiffc.cc
@@ -487,6 +487,90 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSQRDIFFC__WASMSIMD_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSQRDIFFC__WASMSIMD_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSQRDIFFC__WASMSIMD_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8, VBinOpCMicrokernelTester::OpType::SqrDiffC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 TEST(F32_VSQRDIFFC__SCALAR_X1, batch_eq_1) {
   VBinOpCMicrokernelTester()
     .batch_size(1)
diff --git a/test/f32-vsqrdiffc.yaml b/test/f32-vsqrdiffc.yaml
index 11b95f6..e9f1836 100644
--- a/test/f32-vsqrdiffc.yaml
+++ b/test/f32-vsqrdiffc.yaml
@@ -12,6 +12,8 @@
 - name: xnn_f32_vsqrdiffc_ukernel__avx512f_x32
 - name: xnn_f32_vsqrdiffc_ukernel__psimd_x4
 - name: xnn_f32_vsqrdiffc_ukernel__psimd_x8
+- name: xnn_f32_vsqrdiffc_ukernel__wasmsimd_x4
+- name: xnn_f32_vsqrdiffc_ukernel__wasmsimd_x8
 - name: xnn_f32_vsqrdiffc_ukernel__scalar_x1
 - name: xnn_f32_vsqrdiffc_ukernel__scalar_x2
 - name: xnn_f32_vsqrdiffc_ukernel__scalar_x4
diff --git a/test/f32-vsubc-minmax.cc b/test/f32-vsubc-minmax.cc
index d373bfe..96ed14a 100644
--- a/test/f32-vsubc-minmax.cc
+++ b/test/f32-vsubc-minmax.cc
@@ -687,6 +687,246 @@
 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
 
 
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_ARM_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, batch_eq_4) {
+    VBinOpCMicrokernelTester()
+      .batch_size(4)
+      .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, batch_div_4) {
+    for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, batch_lt_4) {
+    for (size_t batch_size = 1; batch_size < 4; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, batch_gt_4) {
+    for (size_t batch_size = 5; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, inplace) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, qmin) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X4, qmax) {
+    for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, batch_eq_8) {
+    VBinOpCMicrokernelTester()
+      .batch_size(8)
+      .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, batch_div_8) {
+    for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, batch_lt_8) {
+    for (size_t batch_size = 1; batch_size < 8; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, batch_gt_8) {
+    for (size_t batch_size = 9; batch_size < 16; batch_size++) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, inplace) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, qmin) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmin(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+
+  TEST(F32_VSUBC_MINMAX__WASMSIMD_X86_X8, qmax) {
+    for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
+      VBinOpCMicrokernelTester()
+        .batch_size(batch_size)
+        .qmax(128)
+        .Test(xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8, VBinOpCMicrokernelTester::OpType::SubC, VBinOpCMicrokernelTester::Variant::Scalar);
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
   TEST(F32_VSUBC_MINMAX__WASM_X1, batch_eq_1) {
     VBinOpCMicrokernelTester()
diff --git a/test/f32-vsubc-minmax.yaml b/test/f32-vsubc-minmax.yaml
index 7d9ae36..f10688c 100644
--- a/test/f32-vsubc-minmax.yaml
+++ b/test/f32-vsubc-minmax.yaml
@@ -12,6 +12,10 @@
 - name: xnn_f32_vsubc_minmax_ukernel__avx512f_x32
 - name: xnn_f32_vsubc_minmax_ukernel__psimd_x4
 - name: xnn_f32_vsubc_minmax_ukernel__psimd_x8
+- name: xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x4
+- name: xnn_f32_vsubc_minmax_ukernel__wasmsimd_arm_x8
+- name: xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x4
+- name: xnn_f32_vsubc_minmax_ukernel__wasmsimd_x86_x8
 - name: xnn_f32_vsubc_minmax_ukernel__wasm_x1
 - name: xnn_f32_vsubc_minmax_ukernel__wasm_x2
 - name: xnn_f32_vsubc_minmax_ukernel__wasm_x4