ND Divide operator with broadcasting support

PiperOrigin-RevId: 284269401
diff --git a/src/init.c b/src/init.c
index 36bbbed..c0f17bf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -224,6 +224,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__scalar_x2,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__scalar_x2,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__scalar_x2,
+      .element_tile = 2,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
@@ -522,6 +528,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__neon_x8,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__neon_x8,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__neon_x8,
+      .element_tile = 8,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
@@ -848,6 +860,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__sse_x8,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__sse_x8,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__sse_x8,
+      .element_tile = 8,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__sse_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
@@ -1072,6 +1090,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__psimd_x4,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
+      .element_tile = 4,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__psimd_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__psimd_x8,
@@ -1271,6 +1295,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__wasm_x2,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__wasm_x2,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__wasm_x2,
+      .element_tile = 2,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasm_x4,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x4,