Update Android.bp following XNNPACK rebase

Test: mm
Change-Id: Ibe244ede896c7df73fd35c90e38ef9b34e246b43
diff --git a/Android.bp b/Android.bp
index ccdd1d6..91417c8 100644
--- a/Android.bp
+++ b/Android.bp
@@ -13,27 +13,27 @@
 // limitations under the License.
 
 OPERATOR_SRCS = [
-    "src/add-nc.c",
-    "src/argmax-pooling-nhwc.c",
-    "src/average-pooling-nhwc.c",
-    "src/binary-elementwise-nd.c",
-    "src/channel-pad-nc.c",
-    "src/channel-shuffle-nc.c",
-    "src/clamp-nc.c",
-    "src/convolution-nchw.c",
-    "src/convolution-nhwc.c",
-    "src/deconvolution-nhwc.c",
-    "src/fully-connected-nc.c",
-    "src/global-average-pooling-ncw.c",
-    "src/global-average-pooling-nwc.c",
-    "src/hardswish-nc.c",
-    "src/leaky-relu-nc.c",
-    "src/max-pooling-nhwc.c",
-    "src/prelu-nc.c",
-    "src/resize-bilinear-nhwc.c",
-    "src/sigmoid-nc.c",
-    "src/softmax-nc.c",
-    "src/unpooling-nhwc.c",
+    "src/operators/add-nc.c",
+    "src/operators/argmax-pooling-nhwc.c",
+    "src/operators/average-pooling-nhwc.c",
+    "src/operators/binary-elementwise-nd.c",
+    "src/operators/channel-pad-nc.c",
+    "src/operators/channel-shuffle-nc.c",
+    "src/operators/clamp-nc.c",
+    "src/operators/convolution-nchw.c",
+    "src/operators/convolution-nhwc.c",
+    "src/operators/deconvolution-nhwc.c",
+    "src/operators/fully-connected-nc.c",
+    "src/operators/global-average-pooling-ncw.c",
+    "src/operators/global-average-pooling-nwc.c",
+    "src/operators/hardswish-nc.c",
+    "src/operators/leaky-relu-nc.c",
+    "src/operators/max-pooling-nhwc.c",
+    "src/operators/prelu-nc.c",
+    "src/operators/resize-bilinear-nhwc.c",
+    "src/operators/sigmoid-nc.c",
+    "src/operators/softmax-nc.c",
+    "src/operators/unpooling-nhwc.c",
 ]
 
 TABLE_SRCS = [
@@ -198,6 +198,13 @@
     "src/math/expminus-scalar-lut2048-p1.c",
     "src/math/expminus-scalar-lut64-p2.c",
     "src/math/expminus-scalar-p5.c",
+    "src/math/roundne-scalar-addsub.c",
+    "src/math/roundd-scalar-addsub.c",
+    "src/math/roundd-scalar-cvt.c",
+    "src/math/roundu-scalar-addsub.c",
+    "src/math/roundu-scalar-cvt.c",
+    "src/math/roundz-scalar-addsub.c",
+    "src/math/roundz-scalar-cvt.c",
     "src/math/sigmoid-scalar-lut2048-p1-div.c",
     "src/math/sigmoid-scalar-lut64-p2-div.c",
     "src/math/sigmoid-scalar-p5-div.c",
@@ -355,6 +362,10 @@
     "src/f32-sigmoid/gen/psimd-p5-div-x16.c",
     "src/f32-sigmoid/gen/psimd-p5-div-x20.c",
     "src/f32-sigmoid/gen/psimd-p5-div-x24.c",
+    "src/math/roundne-psimd-addsub.c",
+    "src/math/roundd-psimd-addsub.c",
+    "src/math/roundu-psimd-addsub.c",
+    "src/math/roundz-psimd-addsub.c",
     "src/math/sigmoid-psimd-p5-div.c",
 ]
 
@@ -364,10 +375,18 @@
     "src/f32-avgpool/9x-minmax-neon-c4.c",
     "src/f32-clamp/gen/neon-x4.c",
     "src/f32-clamp/gen/neon-x8.c",
+    "src/f32-dwconv/gen/up4x4-minmax-neon.c",
+    "src/f32-dwconv/gen/up4x4-minmax-neon-acc2.c",
+    "src/f32-dwconv/gen/up8x4-minmax-neon.c",
+    "src/f32-dwconv/gen/up8x4-minmax-neon-acc2.c",
     "src/f32-dwconv/gen/up4x9-minmax-neon.c",
     "src/f32-dwconv/gen/up4x9-minmax-neon-acc2.c",
     "src/f32-dwconv/gen/up8x9-minmax-neon.c",
     "src/f32-dwconv/gen/up8x9-minmax-neon-acc2.c",
+    "src/f32-dwconv/gen/up4x25-minmax-neon.c",
+    "src/f32-dwconv/gen/up4x25-minmax-neon-acc2.c",
+    "src/f32-dwconv/gen/up8x25-minmax-neon.c",
+    "src/f32-dwconv/gen/up8x25-minmax-neon-acc2.c",
     "src/f32-gavgpool-spchw/neon-x4.c",
     "src/f32-gavgpool/7p7x-minmax-neon-c4.c",
     "src/f32-gavgpool/7x-minmax-neon-c4.c",
@@ -512,6 +531,7 @@
     "src/u8-rmax/neon.c",
     "src/x32-packx/x4-neon-st4.c",
     "src/x32-pad/x2-neon.c",
+    "src/x32-unpool/neon.c",
     "src/x32-zip/x2-neon.c",
     "src/x32-zip/x3-neon.c",
     "src/x32-zip/x4-neon.c",
@@ -520,6 +540,13 @@
     "src/x8-zip/x3-neon.c",
     "src/x8-zip/x4-neon.c",
     "src/x8-zip/xm-neon.c",
+    "src/math/roundne-neon-addsub.c",
+    "src/math/roundd-neon-addsub.c",
+    "src/math/roundd-neon-cvt.c",
+    "src/math/roundu-neon-addsub.c",
+    "src/math/roundu-neon-cvt.c",
+    "src/math/roundz-neon-addsub.c",
+    "src/math/roundz-neon-cvt.c",
     "src/math/sigmoid-neon-frac-p9-p10-nr1recps.c",
     "src/math/sigmoid-neon-rr1-lut2048-p1-nr2recps.c",
     "src/math/sigmoid-neon-rr1-lut64-p2-nr2recps.c",
@@ -545,10 +572,18 @@
     "src/f32-igemm/gen/4x8s4-minmax-neonfma.c",
     "src/f32-igemm/gen/6x8s4-minmax-neonfma.c",
     "src/f32-igemm/gen/8x8s4-minmax-neonfma.c",
+    "src/f32-dwconv/gen/up4x4-minmax-neonfma.c",
+    "src/f32-dwconv/gen/up4x4-minmax-neonfma-acc2.c",
+    "src/f32-dwconv/gen/up8x4-minmax-neonfma.c",
+    "src/f32-dwconv/gen/up8x4-minmax-neonfma-acc2.c",
     "src/f32-dwconv/gen/up4x9-minmax-neonfma.c",
     "src/f32-dwconv/gen/up4x9-minmax-neonfma-acc2.c",
     "src/f32-dwconv/gen/up8x9-minmax-neonfma.c",
     "src/f32-dwconv/gen/up8x9-minmax-neonfma-acc2.c",
+    "src/f32-dwconv/gen/up4x25-minmax-neonfma.c",
+    "src/f32-dwconv/gen/up4x25-minmax-neonfma-acc2.c",
+    "src/f32-dwconv/gen/up8x25-minmax-neonfma.c",
+    "src/f32-dwconv/gen/up8x25-minmax-neonfma-acc2.c",
     "src/f32-gemm/gen/1x8-minmax-neonfma-dup-ld64.c",
     "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld128.c",
     "src/f32-gemm/gen/4x8-minmax-neonfma-dup-ld64.c",
@@ -754,11 +789,72 @@
     "src/math/sigmoid-neonfma-rr2-p5-div.c",
 ]
 
+NEONV8_UKERNELS = [
+    "src/math/roundne-neonv8.c",
+    "src/math/roundd-neonv8.c",
+    "src/math/roundu-neonv8.c",
+    "src/math/roundz-neonv8.c",
+]
+
 AARCH64_NEONFP16ARITH_UKERNELS = [
-    "src/f16-gemm/gen/1x8-neonfp16arith-ld64.c",
-    "src/f16-gemm/gen/4x8-neonfp16arith-ld64.c",
-    "src/f16-gemm/gen/6x8-neonfp16arith-ld64.c",
-    "src/f16-gemm/gen/8x8-neonfp16arith-ld64.c",
+    "src/f16-clamp/gen/neonfp16arith-x16.c",
+    "src/f16-clamp/gen/neonfp16arith-x8.c",
+    "src/f16-hswish/gen/neonfp16arith-x16.c",
+    "src/f16-hswish/gen/neonfp16arith-x8.c",
+    "src/f16-prelu/gen/neonfp16arith-2x16.c",
+    "src/f16-prelu/gen/neonfp16arith-2x8.c",
+    "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vaddc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vadd-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vdivc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vdiv-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vmaxc-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vmaxc-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vminc-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vminc-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vmin-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vmin-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vmulc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vmul-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vrdivc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vrsubc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vsubc-minmax-neonfp16arith-x8.c",
+    "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x16.c",
+    "src/f16-vbinary/gen/vsub-minmax-neonfp16arith-x8.c",
+    "src/f16-gemm/gen/1x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/4x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/6x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/8x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/1x8inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/4x8inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/6x8inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/8x8inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/1x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/4x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/6x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/8x8-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/1x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/4x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/6x16-minmax-neonfp16arith-ld64.c",
+    "src/f16-igemm/gen/8x16-minmax-neonfp16arith-ld64.c",
     "src/f16-spmm/gen/8x1-minmax-neonfp16arith.c",
     "src/f16-spmm/gen/8x1-minmax-neonfp16arith-unroll2.c",
     "src/f16-spmm/gen/16x1-minmax-neonfp16arith.c",
@@ -853,6 +949,10 @@
     "src/f32-vmulcaddc/gen/c4-minmax-sse-2x.c",
     "src/f32-vmulcaddc/gen/c8-minmax-sse-2x.c",
     "src/x32-packx/x4-sse.c",
+    "src/math/roundne-sse-addsub.c",
+    "src/math/roundd-sse-addsub.c",
+    "src/math/roundu-sse-addsub.c",
+    "src/math/roundz-sse-addsub.c",
 ]
 
 SSE2_UKERNELS = [
@@ -892,6 +992,7 @@
     "src/u8-maxpool/9p8x-minmax-sse2-c16.c",
     "src/u8-rmax/sse2.c",
     "src/x32-pad/x2-sse2.c",
+    "src/x32-unpool/sse2.c",
     "src/x32-zip/x2-sse2.c",
     "src/x32-zip/x3-sse2.c",
     "src/x32-zip/x4-sse2.c",
@@ -902,6 +1003,10 @@
     "src/x8-zip/xm-sse2.c",
     "src/math/exp-sse2-p5.c",
     "src/math/expminus-sse2-p5.c",
+    "src/math/roundne-sse2-cvt.c",
+    "src/math/roundd-sse2-cvt.c",
+    "src/math/roundu-sse2-cvt.c",
+    "src/math/roundz-sse2-cvt.c",
     "src/math/sigmoid-sse2-p5-div.c",
     "src/requantization/precise-sse2.c",
     "src/requantization/fp32-sse2.c",
@@ -924,6 +1029,10 @@
     "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
     "src/f32-sigmoid/gen/sse41-p5-div-x20.c",
     "src/f32-sigmoid/gen/sse41-p5-div-x24.c",
+    "src/math/roundne-sse41.c",
+    "src/math/roundd-sse41.c",
+    "src/math/roundu-sse41.c",
+    "src/math/roundz-sse41.c",
     "src/requantization/precise-sse4.c",
     "src/requantization/q31-sse4.c",
     "src/requantization/gemmlowp-sse4.c",
@@ -1305,7 +1414,6 @@
 ]
 
 AARCH32_ASM_UKERNELS = [
-    "src/q8-dwconv/up8x9-minmax-aarch32-neon.S",
     "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a53.S",
     "src/f32-gemm/4x8-minmax-aarch32-neon-cortex-a55.S",
     "src/f32-gemm/gen/4x8-minmax-aarch32-neon-cortex-a75.S",
@@ -1325,6 +1433,14 @@
     "src/f16-gemm/gen-inc/1x16inc-minmax-aarch64-neonfp16arith-ld32.S",
     "src/f16-gemm/gen-inc/4x16inc-minmax-aarch64-neonfp16arith-ld32.S",
     "src/f16-gemm/gen-inc/6x16inc-minmax-aarch64-neonfp16arith-ld32.S",
+    "src/f16-gemm/gen/1x8-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen/4x8-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen/6x8-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen/8x8-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen-inc/4x8inc-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen-inc/6x8inc-minmax-aarch64-neonfp16arith-ld64.S",
+    "src/f16-gemm/gen-inc/8x8inc-minmax-aarch64-neonfp16arith-ld64.S",
     "src/f32-dwconv/up4x9-minmax-aarch64-neonfma-cortex-a55.S",
     "src/f32-dwconv/up4x9-minmax-aarch64-neonfma.S",
     "src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-ld64.S",
@@ -1575,6 +1691,7 @@
             srcs: NEONFMA_UKERNELS,
             cflags: [
                 "-marm",
+                "-march=armv7-a",
                 "-mfpu=neon-vfpv4",
             ],
         },
@@ -1594,6 +1711,33 @@
 }
 
 cc_library_static {
+    name: "xnnpack_neonv8_ukernels",
+    defaults: ["xnnpack_internal_default"],
+    arch: {
+        arm: {
+            srcs: NEONV8_UKERNELS,
+            cflags: [
+                "-marm",
+                "-march=armv8-a",
+                "-mfpu=neon-fp-armv8",
+            ],
+        },
+        arm64: {
+            srcs: NEONV8_UKERNELS,
+        },
+        x86: { enabled: false, },
+        x86_64: { enabled: false, },
+    },
+    header_libs: [
+        "fp16_headers",
+    ],
+    static_libs: [
+        "libpthreadpool",
+        "xnnpack_tables",
+    ],
+}
+
+cc_library_static {
     name: "xnnpack_neonfp16arith_ukernels",
     defaults: ["xnnpack_internal_default"],
     arch: {
@@ -1842,6 +1986,7 @@
                 "xnnpack_psimd_accmath_ukernels",
                 "xnnpack_neon_ukernels",
                 "xnnpack_neonfma_ukernels",
+                "xnnpack_neonv8_ukernels",
                 "xnnpack_asm_ukernels",
             ],
         },
@@ -1851,6 +1996,7 @@
                 "xnnpack_psimd_accmath_ukernels",
                 "xnnpack_neon_ukernels",
                 "xnnpack_neonfma_ukernels",
+                "xnnpack_neonv8_ukernels",
                 "xnnpack_neonfp16arith_ukernels",
                 "xnnpack_asm_ukernels",
             ],