AArch64: initial NEON support

Patch by Ana Pazos

- Completed implementation of instruction formats:
AdvSIMD three same
AdvSIMD modified immediate
AdvSIMD scalar pairwise

- Completed implementation of instruction classes
(some of the instructions in these classes
belong to yet unfinished instruction formats):
Vector Arithmetic
Vector Immediate
Vector Pairwise Arithmetic

- Initial implementation of instruction formats:
AdvSIMD scalar two-reg misc
AdvSIMD scalar three same

- Intial implementation of instruction class:
Scalar Arithmetic

- Initial clang changes to support arm v8 intrinsics.
Note: no clang changes for scalar intrinsics function name mangling yet.

- Comprehensive test cases for added instructions
To verify auto codegen, encoding, decoding, diagnosis, intrinsics.

llvm-svn: 187568
diff --git a/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp
new file mode 100644
index 0000000..86509a0
--- /dev/null
+++ b/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp
@@ -0,0 +1,85 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu  %s -emit-llvm -o - | FileCheck %s
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+typedef unsigned char poly8_t;
+typedef unsigned short poly16_t;
+typedef __fp16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
+typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
+typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
+typedef __attribute__((neon_vector_type(2))) int int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int int32x4_t;
+typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
+typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
+typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(2))) unsigned int uint32x2_t;
+typedef __attribute__((neon_vector_type(4))) unsigned int uint32x4_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
+typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+
+// CHECK: 10__Int8x8_t
+void f1(int8x8_t) {}
+// CHECK: 11__Int16x4_t
+void f2(int16x4_t) {}
+// CHECK: 11__Int32x2_t
+void f3(int32x2_t) {}
+// CHECK: 11__Uint8x8_t
+void f4(uint8x8_t) {}
+// CHECK: 12__Uint16x4_t
+void f5(uint16x4_t) {}
+// CHECK: 13__Float16x4_t
+void f6(float16x4_t) {}
+// CHECK: 13__Float16x8_t
+void f7(float16x8_t) {}
+// CHECK: 12__Uint32x2_t
+void f8(uint32x2_t) {}
+// CHECK: 13__Float32x2_t
+void f9(float32x2_t) {}
+// CHECK: 13__Float32x4_t
+void f10(float32x4_t) {}
+// CHECK: 11__Poly8x8_t
+void f11(poly8x8_t v) {}
+// CHECK: 12__Poly16x4_t
+void f12(poly16x4_t v) {}
+// CHECK:12__Poly8x16_t
+void f13(poly8x16_t v) {}
+// CHECK:12__Poly16x8_t
+void f14(poly16x8_t v) {}
+// CHECK: 11__Int8x16_t
+void f15(int8x16_t) {}
+// CHECK: 11__Int16x8_t
+void f16(int16x8_t) {}
+// CHECK:11__Int32x4_t
+void f17(int32x4_t) {}
+// CHECK: 12__Uint8x16_t
+void f18(uint8x16_t) {}
+// CHECK: 12__Uint16x8_t
+void f19(uint16x8_t) {}
+// CHECK: 12__Uint32x4_t
+void f20(uint32x4_t) {}
+// CHECK: 11__Int64x2_t
+void f21(int64x2_t) {}
+// CHECK: 12__Uint64x2_t
+void f22(uint64x2_t) {}
+// CHECK: 13__Float64x2_t
+void f23(float64x2_t) {}
diff --git a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
index 3723deb..793c898 100644
--- a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
+++ b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple arm-none-linux-gnueabi %s -emit-llvm -o - | FileCheck %s
 
 typedef float float32_t;
+typedef __fp16 float16_t;
 typedef signed char poly8_t;
 typedef short poly16_t;
 typedef unsigned long long uint64_t;
@@ -11,8 +12,10 @@
 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
-typedef __attribute__((neon_polyvector_type(16))) poly8_t  poly8x16_t;
-typedef __attribute__((neon_polyvector_type(8)))  poly16_t poly16x8_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
 
 // CHECK: 16__simd64_int32_t
 void f1(int32x2_t v) { }
@@ -26,7 +29,11 @@
 void f5(float32x2_t v) { }
 // CHECK: 19__simd128_float32_t
 void f6(float32x4_t v) { }
+// CHECK: 18__simd64_float16_t
+void f7(float16x4_t v) {}
+// CHECK: 19__simd128_float16_t
+void f8(float16x8_t v) {}
 // CHECK: 17__simd128_poly8_t
-void f7(poly8x16_t v) { }
+void f9(poly8x16_t v) {}
 // CHECK: 18__simd128_poly16_t
-void f8(poly16x8_t v) { }
+void f10(poly16x8_t v) {}