Add ND operator with broadcasting

- Generalize Multiply implementation to arbitrary binary elementwise operators.
- The legacy Add NC operator will be maintained until Add ND gets support for
  strides.

PiperOrigin-RevId: 283466005
diff --git a/include/xnnpack.h b/include/xnnpack.h
index c32d3d8..8fde48a 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -153,6 +153,23 @@
     float* sum,
     pthreadpool_t threadpool);
 
+enum xnn_status xnn_create_add_nd_f32(
+    float output_min,
+    float output_max,
+    uint32_t flags,
+    xnn_operator_t* add_op_out);
+
+enum xnn_status xnn_setup_add_nd_f32(
+    xnn_operator_t add_op,
+    size_t num_input1_dims,
+    const size_t* input1_shape,
+    size_t num_input2_dims,
+    const size_t* input2_shape,
+    const float* input1,
+    const float* input2,
+    float* output,
+    pthreadpool_t threadpool);
+
 enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
     uint32_t input_padding_top,
     uint32_t input_padding_right,