Add binary op microkernels with RELU activation

PiperOrigin-RevId: 325607697
diff --git a/src/f32-vbinary/gen/vmulc-relu-scalar-x4.c b/src/f32-vbinary/gen/vmulc-relu-scalar-x4.c
new file mode 100644
index 0000000..4e52ae1
--- /dev/null
+++ b/src/f32-vbinary/gen/vmulc-relu-scalar-x4.c
@@ -0,0 +1,65 @@
+// Auto-generated file. Do not edit!
+//   Template: src/f32-vbinary/vopc-scalar.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math.h>
+#include <xnnpack/vbinary.h>
+
+
+void xnn_f32_vmulc_relu_ukernel__scalar_x4(
+    size_t n,
+    const float* a,
+    const float* b,
+    float* y,
+    const union xnn_f32_relu_params params[restrict XNN_MIN_ELEMENTS(1)])
+{
+  assert(n != 0);
+  assert(n % sizeof(float) == 0);
+  assert(a != NULL);
+  assert(b != NULL);
+  assert(y != NULL);
+
+
+  const float vb = *b;
+  for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
+    const float va0 = a[0];
+    const float va1 = a[1];
+    const float va2 = a[2];
+    const float va3 = a[3];
+    a += 4;
+
+    float vy0 = va0 * vb;
+    float vy1 = va1 * vb;
+    float vy2 = va2 * vb;
+    float vy3 = va3 * vb;
+
+
+    vy0 = math_max_f32(vy0, 0.0f);
+    vy1 = math_max_f32(vy1, 0.0f);
+    vy2 = math_max_f32(vy2, 0.0f);
+    vy3 = math_max_f32(vy3, 0.0f);
+
+    y[0] = vy0;
+    y[1] = vy1;
+    y[2] = vy2;
+    y[3] = vy3;
+    y += 4;
+  }
+  if XNN_UNLIKELY(n != 0) {
+    do {
+      const float va = *a++;
+      float vy = va * vb;
+      vy = math_max_f32(vy, 0.0f);
+      *y++ = vy;
+      n -= sizeof(float);
+    } while (n != 0);
+  }
+}