ND Divide operator with broadcasting support

PiperOrigin-RevId: 284269401
diff --git a/BUILD.bazel b/BUILD.bazel
index c88c357..676c071 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -2333,6 +2333,15 @@
 )
 
 xnnpack_unit_test(
+    name = "divide_nd_test",
+    srcs = [
+        "test/binary-elementwise-operator-tester.h",
+        "test/divide-nd.cc",
+    ],
+    deps = OPERATOR_TEST_DEPS,
+)
+
+xnnpack_unit_test(
     name = "fully_connected_nc_test",
     srcs = [
         "test/fully-connected-nc.cc",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 38e5fec..ccad7f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1167,6 +1167,15 @@
   TARGET_LINK_LIBRARIES(deconvolution-nhwc-test PRIVATE XNNPACK gtest gtest_main)
   ADD_TEST(deconvolution-nhwc-test deconvolution-nhwc-test)
 
+  ADD_EXECUTABLE(divide-nd-test test/divide-nd.cc)
+  SET_TARGET_PROPERTIES(divide-nd-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS NO)
+  TARGET_INCLUDE_DIRECTORIES(divide-nd-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(divide-nd-test PRIVATE XNNPACK gtest gtest_main)
+  ADD_TEST(divide-nd-test divide-nd-test)
+
   ADD_EXECUTABLE(fully-connected-nc-test test/fully-connected-nc.cc)
   SET_TARGET_PROPERTIES(fully-connected-nc-test PROPERTIES
     CXX_STANDARD 11
diff --git a/README.md b/README.md
index 23bb14b..5ac5a70 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@
 - 2D Bilinear Resize
 - Add (including broadcasting, two inputs only)
 - Subtract (including broadcasting)
+- Divide (including broadcasting)
 - Maximum (including broadcasting)
 - Minimum (including broadcasting)
 - Multiply (including broadcasting)
diff --git a/include/xnnpack.h b/include/xnnpack.h
index 01546ad..4fdc300 100644
--- a/include/xnnpack.h
+++ b/include/xnnpack.h
@@ -306,6 +306,23 @@
     float* output,
     pthreadpool_t threadpool);
 
+enum xnn_status xnn_create_divide_nd_f32(
+    float output_min,
+    float output_max,
+    uint32_t flags,
+    xnn_operator_t* divide_op_out);
+
+enum xnn_status xnn_setup_divide_nd_f32(
+    xnn_operator_t divide_op,
+    size_t num_input1_dims,
+    const size_t* input1_shape,
+    size_t num_input2_dims,
+    const size_t* input2_shape,
+    const float* input1,
+    const float* input2,
+    float* output,
+    pthreadpool_t threadpool);
+
 enum xnn_status xnn_create_fully_connected_nc_f32(
     size_t input_channels,
     size_t output_channels,
diff --git a/src/binary-elementwise-nd.c b/src/binary-elementwise-nd.c
index 959bce7..c3161b6 100644
--- a/src/binary-elementwise-nd.c
+++ b/src/binary-elementwise-nd.c
@@ -28,7 +28,7 @@
   enum xnn_status status = xnn_status_uninitialized;
 
   if (!xnn_params.initialized) {
-    xnn_log_error("failed to create Add/Subtract/Multiply operator: XNNPACK is not initialized");
+    xnn_log_error("failed to create Add/Subtract/Multiply/Divide/Minimum/Maximum operator: XNNPACK is not initialized");
     goto error;
   }
 
@@ -36,19 +36,19 @@
 
   if (isnan(output_min)) {
     xnn_log_error(
-      "failed to create Add/Subtract/Multiply operator with NaN output lower bound: lower bound must be non-NaN");
+      "failed to create Add/Subtract/Multiply/Divide/Minimum/Maximum operator with NaN output lower bound: lower bound must be non-NaN");
     goto error;
   }
 
   if (isnan(output_max)) {
     xnn_log_error(
-      "failed to create Add/Subtract/Multiply operator with NaN output upper bound: upper bound must be non-NaN");
+      "failed to create Add/Subtract/Multiply/Divide/Minimum/Maximum operator with NaN output upper bound: upper bound must be non-NaN");
     goto error;
   }
 
   if (output_min >= output_max) {
     xnn_log_error(
-      "failed to create Add/Subtract/Multiply operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
+      "failed to create Add/Subtract/Multiply/Divide/Minimum/Maximum operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
       output_min, output_max);
     goto error;
   }
@@ -57,7 +57,7 @@
 
   binary_elementwise_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (binary_elementwise_op == NULL) {
-    xnn_log_error("failed to allocate %zu bytes for Add/Subtract/Multiply operator descriptor", sizeof(struct xnn_operator));
+    xnn_log_error("failed to allocate %zu bytes for Add/Subtract/Multiply/Divide/Minimum/Maximum operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
@@ -86,6 +86,16 @@
     output_min, output_max, flags, xnn_operator_type_add_nd_f32, add_op_out);
 }
 
+enum xnn_status xnn_create_divide_nd_f32(
+    float output_min,
+    float output_max,
+    uint32_t flags,
+    xnn_operator_t* divide_op_out)
+{
+  return create_binary_elementwise_nd_f32(
+    output_min, output_max, flags, xnn_operator_type_divide_nd_f32, divide_op_out);
+}
+
 enum xnn_status xnn_create_maximum_nd_f32(
     uint32_t flags,
     xnn_operator_t* maximum_op_out)
@@ -138,19 +148,19 @@
     size_t num_threads)
 {
   if (binary_elementwise_op->type != expected_operator_type) {
-    xnn_log_error("failed to setup Add/Subtract/Multiply (ND, F32) operator: operator type mismatch");
+    xnn_log_error("failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum (ND, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   binary_elementwise_op->state = xnn_run_state_invalid;
 
   if (!xnn_params.initialized) {
-    xnn_log_error("failed to setup Add/Subtract/Multiply operator: XNNPACK is not initialized");
+    xnn_log_error("failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum operator: XNNPACK is not initialized");
     return xnn_status_uninitialized;
   }
 
   if (max(num_input1_dims, num_input2_dims) > XNN_MAX_TENSOR_DIMS) {
     xnn_log_error(
-      "failed to setup Add/Subtract/Multiply operator with %zu and %zu dimensions in input shapes: "
+      "failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum operator with %zu and %zu dimensions in input shapes: "
       "the number of input dimensions must not exceed %d",
       num_input1_dims, num_input2_dims, XNN_MAX_TENSOR_DIMS);
     return xnn_status_unsupported_parameter;
@@ -158,14 +168,14 @@
 
   for (size_t i = 0; i < num_input1_dims; i++) {
     if (input1_shape[i] == 0) {
-      xnn_log_error("failed to setup Add/Subtract/Multiply operator: shape dimension #%zu of input #1 is zero", i);
+      xnn_log_error("failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum operator: shape dimension #%zu of input #1 is zero", i);
       return xnn_status_invalid_parameter;
     }
   }
 
   for (size_t i = 0; i < num_input2_dims; i++) {
     if (input2_shape[i] == 0) {
-      xnn_log_error("failed to setup Add/Subtract/Multiply operator: shape dimension #%zu of input #2 is zero", i);
+      xnn_log_error("failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum operator: shape dimension #%zu of input #2 is zero", i);
       return xnn_status_invalid_parameter;
     }
   }
@@ -217,7 +227,7 @@
       compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
       compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
     } else {
-      xnn_log_error("failed to setup Add/Subtract/Multiply operator: "
+      xnn_log_error("failed to setup Add/Subtract/Multiply/Divide/Minimum/Maximum operator: "
         "shape dimension #%zu of input1 (%zu) does not match shape dimension #%zu of input2 (%zu)",
         num_input1_dims - i, input1_dim, num_input2_dims - i, input2_dim);
       return xnn_status_invalid_parameter;
@@ -313,6 +323,26 @@
     pthreadpool_get_threads_count(threadpool));
 }
 
+enum xnn_status xnn_setup_divide_nd_f32(
+    xnn_operator_t divide_op,
+    size_t num_input1_dims,
+    const size_t* input1_shape,
+    size_t num_input2_dims,
+    const size_t* input2_shape,
+    const float* input1,
+    const float* input2,
+    float* output,
+    pthreadpool_t threadpool)
+{
+  return setup_binary_elementwise_nd_f32(
+    divide_op, xnn_operator_type_divide_nd_f32,
+    num_input1_dims, input1_shape,
+    num_input2_dims, input2_shape,
+    input1, input2, output,
+    &xnn_params.f32.vdiv,
+    pthreadpool_get_threads_count(threadpool));
+}
+
 enum xnn_status xnn_setup_maximum_nd_f32(
     xnn_operator_t maximum_op,
     size_t num_input1_dims,
diff --git a/src/init.c b/src/init.c
index 36bbbed..c0f17bf 100644
--- a/src/init.c
+++ b/src/init.c
@@ -224,6 +224,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__scalar_x2,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__scalar_x2,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__scalar_x2,
+      .element_tile = 2,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
@@ -522,6 +528,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__neon_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__neon_x8,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__neon_x8,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__neon_x8,
+      .element_tile = 8,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__neon_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__neon_x8,
@@ -848,6 +860,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__sse_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__sse_x8,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__sse_x8,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__sse_x8,
+      .element_tile = 8,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__sse_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__sse_x8,
@@ -1072,6 +1090,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__psimd_x8,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__psimd_x4,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__psimd_x4,
+      .element_tile = 4,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__psimd_x8,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__psimd_x8,
@@ -1271,6 +1295,12 @@
       .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vaddc_ukernel__wasm_x4,
       .element_tile = 8,
     };
+    xnn_params.f32.vdiv = (struct vbinary_parameters) {
+      .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdiv_ukernel__wasm_x2,
+      .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vdivc_ukernel__wasm_x2,
+      .ropc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vrdivc_ukernel__wasm_x2,
+      .element_tile = 2,
+    };
     xnn_params.f32.vmax = (struct vbinary_parameters) {
       .op_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmax_ukernel__wasm_x4,
       .opc_ukernel = (xnn_vbinary_ukernel_function) xnn_f32_vmaxc_ukernel__wasm_x4,
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index fed1165..9ff2564 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -62,6 +62,7 @@
   xnn_operator_type_convolution_nchw_f32,
   xnn_operator_type_deconvolution_nhwc_f32,
   xnn_operator_type_deconvolution_nhwc_q8,
+  xnn_operator_type_divide_nd_f32,
   xnn_operator_type_fully_connected_nc_f32,
   xnn_operator_type_fully_connected_nc_q8,
   xnn_operator_type_global_average_pooling_nwc_f32,
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index a047d62..8de5adc 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -1363,6 +1363,7 @@
     xnn_univector_ukernel_function sigmoid;
     struct prelu_parameters prelu;
     struct vbinary_parameters vadd;
+    struct vbinary_parameters vdiv;
     struct vbinary_parameters vmax;
     struct vbinary_parameters vmin;
     struct vbinary_parameters vmul;
diff --git a/test/binary-elementwise-operator-tester.h b/test/binary-elementwise-operator-tester.h
index 9a45854..16b8a52 100644
--- a/test/binary-elementwise-operator-tester.h
+++ b/test/binary-elementwise-operator-tester.h
@@ -26,6 +26,7 @@
   enum class OperationType {
     Unknown,
     Add,
+    Divide,
     Maximum,
     Minimum,
     Multiply,
@@ -118,6 +119,8 @@
     switch (operation_type()) {
       case OperationType::Add:
         return a + b;
+      case OperationType::Divide:
+        return a / b;
       case OperationType::Maximum:
         return std::max<float>(a, b);
       case OperationType::Minimum:
@@ -136,7 +139,7 @@
 
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
 
     // Compute generalized shapes.
     std::array<size_t, XNN_MAX_TENSOR_DIMS> input1_dims;
@@ -217,6 +220,12 @@
               output_min, output_max,
               0, &binary_elementwise_op));
           break;
+        case OperationType::Divide:
+          ASSERT_EQ(xnn_status_success,
+            xnn_create_divide_nd_f32(
+              output_min, output_max,
+              0, &binary_elementwise_op));
+          break;
         case OperationType::Maximum:
           ASSERT_EQ(xnn_status_success,
             xnn_create_maximum_nd_f32(
@@ -259,6 +268,17 @@
               input1.data(), input2.data(), output.data(),
               nullptr /* thread pool */));
           break;
+        case OperationType::Divide:
+          ASSERT_EQ(xnn_status_success,
+            xnn_setup_divide_nd_f32(
+              binary_elementwise_op,
+              num_input1_dims(),
+              input1_shape().data(),
+              num_input2_dims(),
+              input2_shape().data(),
+              input1.data(), input2.data(), output.data(),
+              nullptr /* thread pool */));
+          break;
         case OperationType::Maximum:
           ASSERT_EQ(xnn_status_success,
             xnn_setup_maximum_nd_f32(
diff --git a/test/divide-nd.cc b/test/divide-nd.cc
new file mode 100644
index 0000000..a2e793f
--- /dev/null
+++ b/test/divide-nd.cc
@@ -0,0 +1,1154 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <gtest/gtest.h>
+
+#include "binary-elementwise-operator-tester.h"
+
+constexpr size_t kDim1 = 2;
+constexpr size_t kDim2 = 3;
+constexpr size_t kDim3 = 4;
+constexpr size_t kDim4 = 5;
+constexpr size_t kDim5 = 6;
+constexpr size_t kDim6 = 7;
+
+
+TEST(DIVIDE_ND_F32, 0d_x_0d) {
+  BinaryElementwiseOperatorTester()
+    .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+    .TestF32();
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_1d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_2d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim2, input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim2, input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_3d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+    const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+    const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+    const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+    const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_4d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+    const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+    const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+    const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+    const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+    const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+    const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+    const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+    const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_5d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+    const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+    const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+    const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+    const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+    const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+    const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+    const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+    const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+    const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+    const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+    const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+    const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .iterations(1)
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 0d_x_6d) {
+  for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+    const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+    const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+    const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+    const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+    const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+    const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+    const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+    const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+    const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+    const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+    const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+    const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 1d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 1); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 2d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 2); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim2, input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 3d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 3); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 4d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 4); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 5d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 5); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .iterations(1)
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_0d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+    const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+    const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+    const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+    const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+    const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+    const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+    const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+    const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+    const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+    const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+    const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+    BinaryElementwiseOperatorTester()
+      .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+      .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+      .TestF32();
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_1d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 1); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_2d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 2); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_3d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 3); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_4d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 4); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_5d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 5); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .iterations(1)
+        .TestF32();
+    }
+  }
+}
+
+TEST(DIVIDE_ND_F32, 6d_x_6d) {
+  for (uint32_t bm1 = 0; bm1 < (uint32_t(1) << 6); bm1++) {
+    for (uint32_t bm2 = 0; bm2 < (uint32_t(1) << 6); bm2++) {
+      const bool input1_broadcast_dim1 = bm1 & (uint32_t(1) << 0);
+      const bool input1_broadcast_dim2 = bm1 & (uint32_t(1) << 1);
+      const bool input1_broadcast_dim3 = bm1 & (uint32_t(1) << 2);
+      const bool input1_broadcast_dim4 = bm1 & (uint32_t(1) << 3);
+      const bool input1_broadcast_dim5 = bm1 & (uint32_t(1) << 4);
+      const bool input1_broadcast_dim6 = bm1 & (uint32_t(1) << 5);
+      const bool input2_broadcast_dim1 = bm2 & (uint32_t(1) << 0);
+      const bool input2_broadcast_dim2 = bm2 & (uint32_t(1) << 1);
+      const bool input2_broadcast_dim3 = bm2 & (uint32_t(1) << 2);
+      const bool input2_broadcast_dim4 = bm2 & (uint32_t(1) << 3);
+      const bool input2_broadcast_dim5 = bm2 & (uint32_t(1) << 4);
+      const bool input2_broadcast_dim6 = bm2 & (uint32_t(1) << 5);
+      const size_t input1_dim1 = input1_broadcast_dim1 ? 1 : kDim1;
+      const size_t input1_dim2 = input1_broadcast_dim2 ? 1 : kDim2;
+      const size_t input1_dim3 = input1_broadcast_dim3 ? 1 : kDim3;
+      const size_t input1_dim4 = input1_broadcast_dim4 ? 1 : kDim4;
+      const size_t input1_dim5 = input1_broadcast_dim5 ? 1 : kDim5;
+      const size_t input1_dim6 = input1_broadcast_dim6 ? 1 : kDim6;
+      const size_t input2_dim1 = input2_broadcast_dim1 ? 1 : kDim1;
+      const size_t input2_dim2 = input2_broadcast_dim2 ? 1 : kDim2;
+      const size_t input2_dim3 = input2_broadcast_dim3 ? 1 : kDim3;
+      const size_t input2_dim4 = input2_broadcast_dim4 ? 1 : kDim4;
+      const size_t input2_dim5 = input2_broadcast_dim5 ? 1 : kDim5;
+      const size_t input2_dim6 = input2_broadcast_dim6 ? 1 : kDim6;
+      BinaryElementwiseOperatorTester()
+        .operation_type(BinaryElementwiseOperatorTester::OperationType::Divide)
+        .input1_shape({input1_dim6, input1_dim5, input1_dim4, input1_dim3, input1_dim2, input1_dim1})
+        .input2_shape({input2_dim6, input2_dim5, input2_dim4, input2_dim3, input2_dim2, input2_dim1})
+        .iterations(1)
+        .TestF32();
+    }
+  }
+}