Subtract ND operator
Binary ND subtraction operator with broadcasting support
PiperOrigin-RevId: 283632285
diff --git a/src/init.c b/src/init.c
index a82716a..5c32056 100644
--- a/src/init.c
+++ b/src/init.c
@@ -230,6 +230,12 @@
.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
.element_tile = 8,
};
+ xnn_params.f32.vsub = (struct vbinary_parameters) {
+ .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
+ .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
+ .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
+ .element_tile = 8,
+ };
xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
.ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neon_2x,
.channel_tile = 4,
@@ -510,6 +516,12 @@
.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__neon_x8,
.element_tile = 8,
};
+ xnn_params.f32.vsub = (struct vbinary_parameters) {
+ .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__neon_x8,
+ .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__neon_x8,
+ .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__neon_x8,
+ .element_tile = 8,
+ };
xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
.ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x,
.channel_tile = 4,
@@ -818,6 +830,12 @@
.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__sse_x8,
.element_tile = 8,
};
+ xnn_params.f32.vsub = (struct vbinary_parameters) {
+ .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__sse_x8,
+ .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__sse_x8,
+ .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__sse_x8,
+ .element_tile = 8,
+ };
xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
.ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__sse_2x,
.channel_tile = 4,
@@ -1024,6 +1042,12 @@
.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__psimd_x8,
.element_tile = 8,
};
+ xnn_params.f32.vsub = (struct vbinary_parameters) {
+ .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__psimd_x8,
+ .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__psimd_x8,
+ .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__psimd_x8,
+ .element_tile = 8,
+ };
xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
.ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c4__psimd_2x,
.channel_tile = 4,
@@ -1205,6 +1229,12 @@
.ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmulc_ukernel__scalar_x4,
.element_tile = 8,
};
+ xnn_params.f32.vsub = (struct vbinary_parameters) {
+ .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsub_ukernel__scalar_x4,
+ .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vsubc_ukernel__scalar_x4,
+ .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrsubc_ukernel__scalar_x4,
+ .element_tile = 8,
+ };
xnn_params.f32.vmulcaddc = (struct vmulcaddc_parameters) {
.ukernel = (xnn_vmulcaddc_ukernel_function) xnn_f32_vmulcaddc_ukernel_c1__scalar_2x,
.channel_tile = 1,