Update aosp/master clang for rebase to r256229

http://b/26987366

Change-Id: I5d349c9843ea5c24d6e455956f8a446393b6873d
diff --git a/test/CodeGen/2003-12-14-ExternInlineSupport.c b/test/CodeGen/2003-12-14-ExternInlineSupport.c
index eb3859c..cf01fd1 100644
--- a/test/CodeGen/2003-12-14-ExternInlineSupport.c
+++ b/test/CodeGen/2003-12-14-ExternInlineSupport.c
@@ -1,3 +1,4 @@
-// RUN: %clang_cc1 -std=gnu89 %s -emit-llvm -o - | not grep dead_function
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=gnu89 %s -emit-llvm -o - | FileCheck %s
 
 extern __inline__ void dead_function() {}
+// CHECK-NOT: dead_function
diff --git a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
index 9ceee4c..f6ce552 100644
--- a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
+++ b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
@@ -6,7 +6,7 @@
 
 // This is PR244
 
-// CHECK-LABEL: define void @bar(
+// CHECK-LABEL: define {{.*}}void @bar(
 // CHECK: call {{.*}} @func
 // CHECK: define internal {{.*}}i32 @func(
 static int func();
diff --git a/test/CodeGen/2006-01-13-Includes.c b/test/CodeGen/2006-01-13-Includes.c
index 9cc45ce..4b50526 100644
--- a/test/CodeGen/2006-01-13-Includes.c
+++ b/test/CodeGen/2006-01-13-Includes.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -g -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=limited -emit-llvm -o - | FileCheck %s
 // PR676
 
 int printf(const char * restrict format, ...);
diff --git a/test/CodeGen/2007-04-14-FNoBuiltin.c b/test/CodeGen/2007-04-14-FNoBuiltin.c
index 25ae01c..4d194b1 100644
--- a/test/CodeGen/2007-04-14-FNoBuiltin.c
+++ b/test/CodeGen/2007-04-14-FNoBuiltin.c
@@ -3,7 +3,7 @@
 
 extern int printf(const char*, ...);
 
-// CHECK: define void {{.*}}foo(
+// CHECK: define {{.*}}void {{.*}}foo(
 void foo(const char *msg) {
   // CHECK: call {{.*}}printf
   printf("%s\n",msg);
diff --git a/test/CodeGen/2007-05-11-str-const.c b/test/CodeGen/2007-05-11-str-const.c
index 731496d..5c3039c 100644
--- a/test/CodeGen/2007-05-11-str-const.c
+++ b/test/CodeGen/2007-05-11-str-const.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s  -o /dev/null
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s  -o /dev/null
 
 static unsigned char out[]={0,1};
 static const unsigned char str1[]="1";
diff --git a/test/CodeGen/2009-01-21-InvalidIterator.c b/test/CodeGen/2009-01-21-InvalidIterator.c
index f857b4d..83353da 100644
--- a/test/CodeGen/2009-01-21-InvalidIterator.c
+++ b/test/CodeGen/2009-01-21-InvalidIterator.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 
 typedef long unsigned int size_t;
 typedef unsigned short int uint16_t;
diff --git a/test/CodeGen/2009-03-13-dbg.c b/test/CodeGen/2009-03-13-dbg.c
index 8f48830..5a1f294 100644
--- a/test/CodeGen/2009-03-13-dbg.c
+++ b/test/CodeGen/2009-03-13-dbg.c
@@ -1,2 +1,2 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 void foo() {}
diff --git a/test/CodeGen/2009-04-23-dbg.c b/test/CodeGen/2009-04-23-dbg.c
index 704aba2..69c38b2 100644
--- a/test/CodeGen/2009-04-23-dbg.c
+++ b/test/CodeGen/2009-04-23-dbg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -g -o %t %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -o %t %s
 # 1 "a.c"
 # 1 "a.c" 1
 # 1 "<built-in>" 1
diff --git a/test/CodeGen/2009-07-31-DbgDeclare.c b/test/CodeGen/2009-07-31-DbgDeclare.c
index 3ccb263..b1d8220 100644
--- a/test/CodeGen/2009-07-31-DbgDeclare.c
+++ b/test/CodeGen/2009-07-31-DbgDeclare.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -g -o %t.s %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -o %t.s %s
 void foo() {
      int i = 0;
      i = 42;
diff --git a/test/CodeGen/2010-01-14-FnType-DebugInfo.c b/test/CodeGen/2010-01-14-FnType-DebugInfo.c
index 964c031..5cb0015 100644
--- a/test/CodeGen/2010-01-14-FnType-DebugInfo.c
+++ b/test/CodeGen/2010-01-14-FnType-DebugInfo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 typedef void (*sigcatch_t)( struct sigcontext *);
 sigcatch_t sigcatch[50] = {(sigcatch_t) 0};
 
diff --git a/test/CodeGen/2010-01-18-Inlined-Debug.c b/test/CodeGen/2010-01-18-Inlined-Debug.c
index bdc6fc5..d763744 100644
--- a/test/CodeGen/2010-01-18-Inlined-Debug.c
+++ b/test/CodeGen/2010-01-18-Inlined-Debug.c
@@ -1,5 +1,5 @@
 // PR: 6058
-// RUN: %clang_cc1 -g -emit-llvm %s -o /dev/null
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm %s -o /dev/null
 
 static inline int foo(double) __attribute__ ((always_inline));
 static inline int foo(double __x) { return __x; }
diff --git a/test/CodeGen/2010-02-10-PointerName.c b/test/CodeGen/2010-02-10-PointerName.c
index 2321c01..e5f6684 100644
--- a/test/CodeGen/2010-02-10-PointerName.c
+++ b/test/CodeGen/2010-02-10-PointerName.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o - | FileCheck %s
 // CHECK: DW_TAG_pointer_type
 // CHECK-NOT: {"char"}
 
diff --git a/test/CodeGen/2010-02-15-DbgStaticVar.c b/test/CodeGen/2010-02-15-DbgStaticVar.c
index 273385a..a1bfa62 100644
--- a/test/CodeGen/2010-02-15-DbgStaticVar.c
+++ b/test/CodeGen/2010-02-15-DbgStaticVar.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Test to check intentionally empty linkage name for a static variable.
 // Radar 7651244.
 static int foo(int a)
diff --git a/test/CodeGen/2010-02-16-DbgScopes.c b/test/CodeGen/2010-02-16-DbgScopes.c
index 3c33bae..4188f74 100644
--- a/test/CodeGen/2010-02-16-DbgScopes.c
+++ b/test/CodeGen/2010-02-16-DbgScopes.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g < %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited < %s | FileCheck %s
 // Test to check number of lexical scope identified in debug info.
 // CHECK: !DILexicalBlock(
 // CHECK: !DILexicalBlock(
diff --git a/test/CodeGen/2010-03-5-LexicalScope.c b/test/CodeGen/2010-03-5-LexicalScope.c
index 007be76..c0da9f0 100644
--- a/test/CodeGen/2010-03-5-LexicalScope.c
+++ b/test/CodeGen/2010-03-5-LexicalScope.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // CHECK: !DILexicalBlock(
 // CHECK: !DILexicalBlock(
 int foo(int i) {
diff --git a/test/CodeGen/2010-07-08-DeclDebugLineNo.c b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
index 386c2c3..94c5e65 100644
--- a/test/CodeGen/2010-07-08-DeclDebugLineNo.c
+++ b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // Insure that dbg.declare lines for locals refer to correct line number records.
 // Radar 8152866.
 void foo() {
diff --git a/test/CodeGen/2010-08-10-DbgConstant.c b/test/CodeGen/2010-08-10-DbgConstant.c
index 04956ae..cbc1841 100644
--- a/test/CodeGen/2010-08-10-DbgConstant.c
+++ b/test/CodeGen/2010-08-10-DbgConstant.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -emit-llvm -g  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited  %s -o - | FileCheck %s
 // CHECK: !DIGlobalVariable(
 
 static const unsigned int ro = 201;
diff --git a/test/CodeGen/3dnow-builtins.c b/test/CodeGen/3dnow-builtins.c
index f53b85c..d534349 100644
--- a/test/CodeGen/3dnow-builtins.c
+++ b/test/CodeGen/3dnow-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +3dnow -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-unknown -target-feature +3dnowa -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/Inputs/stdio.h b/test/CodeGen/Inputs/stdio.h
index cfe3595..fc49fd8 100644
--- a/test/CodeGen/Inputs/stdio.h
+++ b/test/CodeGen/Inputs/stdio.h
@@ -5,5 +5,5 @@
 extern __inline __attribute__((gnu_inline,always_inline)) int
 vprintf(const char *x, __builtin_va_list y)
 {
-  return vfprintf (0, 0, 0);
+  return vfprintf (0, 0, y);
 }
diff --git a/test/CodeGen/Nontemporal.cpp b/test/CodeGen/Nontemporal.cpp
new file mode 100644
index 0000000..4ddb9a1
--- /dev/null
+++ b/test/CodeGen/Nontemporal.cpp
@@ -0,0 +1,48 @@
+// Test frontend handling of nontemporal builtins.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+float __attribute__((vector_size(16))) vf1, vf2;
+char __attribute__((vector_size(8))) vc1, vc2;
+bool b1, b2;
+
+void test_all_sizes(void)                 // CHECK-LABEL: test_all_sizes
+{
+  __builtin_nontemporal_store(true, &b1); // CHECK: store i8 1, i8* @b1, align 1, !nontemporal
+  __builtin_nontemporal_store(b1, &b2);   // CHECK: store i8{{.*}}, align 1, !nontemporal
+  __builtin_nontemporal_store(1, &uc);    // CHECK: store i8{{.*}}align 1, !nontemporal
+  __builtin_nontemporal_store(1, &sc);    // CHECK: store i8{{.*}}align 1, !nontemporal
+  __builtin_nontemporal_store(1, &us);    // CHECK: store i16{{.*}}align 2, !nontemporal
+  __builtin_nontemporal_store(1, &ss);    // CHECK: store i16{{.*}}align 2, !nontemporal
+  __builtin_nontemporal_store(1, &ui);    // CHECK: store i32{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1, &si);    // CHECK: store i32{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1, &ull);   // CHECK: store i64{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(1, &sll);   // CHECK: store i64{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(1.0, &f1);  // CHECK: store float{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1.0, &d1);  // CHECK: store double{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(vf1, &vf2); // CHECK: store <4 x float>{{.*}}align 16, !nontemporal
+  __builtin_nontemporal_store(vc1, &vc2); // CHECK: store <8 x i8>{{.*}}align 8, !nontemporal
+
+  b1 = __builtin_nontemporal_load(&b2);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  uc = __builtin_nontemporal_load(&sc);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  sc = __builtin_nontemporal_load(&uc);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  us = __builtin_nontemporal_load(&ss);   // CHECK: load i16{{.*}}align 2, !nontemporal
+  ss = __builtin_nontemporal_load(&us);   // CHECK: load i16{{.*}}align 2, !nontemporal
+  ui = __builtin_nontemporal_load(&si);   // CHECK: load i32{{.*}}align 4, !nontemporal
+  si = __builtin_nontemporal_load(&ui);   // CHECK: load i32{{.*}}align 4, !nontemporal
+  ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal
+  sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal
+  f1 = __builtin_nontemporal_load(&f2);   // CHECK: load float{{.*}}align 4, !nontemporal
+  d1 = __builtin_nontemporal_load(&d2);   // CHECK: load double{{.*}}align 8, !nontemporal
+  vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
+  vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
+}
diff --git a/test/CodeGen/PR8880.c b/test/CodeGen/PR8880.c
index e03d2a4..ff8491e 100644
--- a/test/CodeGen/PR8880.c
+++ b/test/CodeGen/PR8880.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -Wno-gcc-compat -emit-llvm -o - %s | FileCheck %s
 
 void pr8880_cg_1(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_1(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_1(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2; i != 10 ; i++ )
@@ -31,7 +31,7 @@
 }
 
 void pr8880_cg_2(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_2(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_2(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2; i != 10 ; i++ )
@@ -61,7 +61,7 @@
 }
 
 void pr8880_cg_3(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_3(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_3(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2 ; i != 10 ; i++ )
@@ -92,7 +92,7 @@
 }
 
 void pr8880_cg_4(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_4(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_4(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2 ; i != 10 ; i++ )
@@ -123,7 +123,7 @@
 }
 
 void pr8880_cg_5(int x, int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_5(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_5(
   int y = 5;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
 // CHECK: [[OUTER_COND]]
@@ -148,7 +148,7 @@
 }
 
 void pr8880_cg_6(int x, int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_6(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_6(
   int y = 5;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
 // CHECK: [[OUTER_COND]]
diff --git a/test/CodeGen/aarch64-neon-vget.c b/test/CodeGen/aarch64-neon-vget.c
new file mode 100644
index 0000000..83c6494
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-vget.c
@@ -0,0 +1,348 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8_t test_vget_lane_u8(uint8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_u8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_u8(a, 7);
+}
+
+uint16_t test_vget_lane_u16(uint16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_u16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_u16(a, 3);
+}
+
+uint32_t test_vget_lane_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_u32:
+  // CHECK-NEXT:  mov.s  w0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_u32(a, 1);
+}
+
+int8_t test_vget_lane_s8(int8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_s8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_s8(a, 7);
+}
+
+int16_t test_vget_lane_s16(int16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_s16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_s16(a, 3);
+}
+
+int32_t test_vget_lane_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_s32:
+  // CHECK-NEXT:  mov.s  w0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_s32(a, 1);
+}
+
+poly8_t test_vget_lane_p8(poly8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_p8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_p8(a, 7);
+}
+
+poly16_t test_vget_lane_p16(poly16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_p16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_p16(a, 3);
+}
+
+float32_t test_vget_lane_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_f32:
+  // CHECK-NEXT:  mov s0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_f32(a, 1);
+}
+
+float32_t test_vget_lane_f16(float16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_f16:
+  // CHECK-NEXT:  umov.h w8, v0[1]
+  // CHECK-NEXT:  fmov s0, w8
+  // CHECK-NEXT:  fcvt s0, h0
+  // CHECK-NEXT:  ret
+  return vget_lane_f16(a, 1);
+}
+
+uint8_t test_vgetq_lane_u8(uint8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u8(a, 15);
+}
+
+uint16_t test_vgetq_lane_u16(uint16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u16(a, 7);
+}
+
+uint32_t test_vgetq_lane_u32(uint32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u32:
+  // CHECK-NEXT:  mov.s  w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u32(a, 3);
+}
+
+int8_t test_vgetq_lane_s8(int8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s8(a, 15);
+}
+
+int16_t test_vgetq_lane_s16(int16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s16(a, 7);
+}
+
+int32_t test_vgetq_lane_s32(int32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s32:
+  // CHECK-NEXT:  mov.s  w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s32(a, 3);
+}
+
+poly8_t test_vgetq_lane_p8(poly8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_p8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_p8(a, 15);
+}
+
+poly16_t test_vgetq_lane_p16(poly16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_p16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_p16(a, 7);
+}
+
+float32_t test_vgetq_lane_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_f32:
+  // CHECK-NEXT:  mov s0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_f32(a, 3);
+}
+
+float32_t test_vgetq_lane_f16(float16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_f16:
+  // CHECK-NEXT:  umov.h w8, v0[3]
+  // CHECK-NEXT:  fmov s0, w8
+  // CHECK-NEXT:  fcvt s0, h0
+  // CHECK-NEXT:  ret
+  return vgetq_lane_f16(a, 3);
+}
+
+int64_t test_vget_lane_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vget_lane_s64:
+  // CHECK-NEXT:  fmov x0, d0
+  // CHECK-NEXT:  ret
+  return vget_lane_s64(a, 0);
+}
+
+uint64_t test_vget_lane_u64(uint64x1_t a) {
+  // CHECK-LABEL: test_vget_lane_u64:
+  // CHECK-NEXT:  fmov x0, d0
+  // CHECK-NEXT:  ret
+  return vget_lane_u64(a, 0);
+}
+
+int64_t test_vgetq_lane_s64(int64x2_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s64:
+  // CHECK-NEXT:  mov.d  x0, v0[1]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s64(a, 1);
+}
+
+uint64_t test_vgetq_lane_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u64:
+  // CHECK-NEXT:  mov.d  x0, v0[1]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u64(a, 1);
+}
+
+
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_u8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u8(a, b, 7);
+}
+
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_u16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u16(a, b, 3);
+}
+
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_u32:
+  // CHECK-NEXT:  ins.s v0[1], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u32(a, b, 1);
+}
+
+int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_s8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s8(a, b, 7);
+}
+
+int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_s16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s16(a, b, 3);
+}
+
+int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_s32:
+  // CHECK-NEXT:  ins.s v0[1], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s32(a, b, 1);
+}
+
+poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_p8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_p8(a, b, 7);
+}
+
+poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_p16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_p16(a, b, 3);
+}
+
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_f32:
+  // CHECK-NEXT:  ins.s v1[1], v0[0]
+  // CHECK-NEXT:  mov.16b  v0, v1
+  // CHECK-NEXT:  ret
+  return vset_lane_f32(a, b, 1);
+}
+
+float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_f16:
+  // CHECK-NEXT:  ld1.h { v0 }[3], [x0]
+  // CHECK-NEXT:  ret
+  return vset_lane_f16(*a, b, 3);
+}
+
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u8(a, b, 15);
+}
+
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u16(a, b, 7);
+}
+
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u32:
+  // CHECK-NEXT:  ins.s v0[3], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u32(a, b, 3);
+}
+
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s8(a, b, 15);
+}
+
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s16(a, b, 7);
+}
+
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s32:
+  // CHECK-NEXT:  ins.s v0[3], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s32(a, b, 3);
+}
+
+poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_p8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_p8(a, b, 15);
+}
+
+poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_p16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_p16(a, b, 7);
+}
+
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_f32:
+  // CHECK-NEXT:  ins.s v1[3], v0[0]
+  // CHECK-NEXT:  mov.16b  v0, v1
+  // CHECK-NEXT:  ret
+  return vsetq_lane_f32(a, b, 3);
+}
+
+float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_f16:
+  // CHECK-NEXT:  ld1.h { v0 }[7], [x0]
+  // CHECK-NEXT:  ret
+  return vsetq_lane_f16(*a, b, 7);
+}
+
+int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
+  // CHECK-LABEL: test_vset_lane_s64:
+  // CHECK-NEXT:  fmov d0, x0
+  // CHECK-NEXT:  ret
+  return vset_lane_s64(a, b, 0);
+}
+
+uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
+  // CHECK-LABEL: test_vset_lane_u64:
+  // CHECK-NEXT:  fmov d0, x0
+  // CHECK-NEXT:  ret
+  return vset_lane_u64(a, b, 0);
+}
+
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s64:
+  // CHECK-NEXT:  ins.d v0[1], x0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s64(a, b, 1);
+}
+
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u64:
+  // CHECK-NEXT:  ins.d v0[1], x0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u64(a, b, 1);
+}
diff --git a/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
new file mode 100644
index 0000000..078b454
--- /dev/null
+++ b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
@@ -0,0 +1,128 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+
+ #include <arm_neon.h>
+
+// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s16
+int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  return vqrdmlah_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s32
+int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  return vqrdmlah_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s16
+int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  return vqrdmlahq_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s32
+int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  return vqrdmlahq_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_s16
+int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+  return vqrdmlahh_s16(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_s32
+int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return vqrdmlahs_s32(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_lane_s16
+int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+  return vqrdmlahh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_lane_s32
+int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  return vqrdmlahs_lane_s32(a, b, c, 1);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_laneq_s16
+int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+  return vqrdmlahh_laneq_s16(a, b, c, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_laneq_s32
+int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  return vqrdmlahs_laneq_s32(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s16
+int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  return vqrdmlsh_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s32
+int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  return vqrdmlsh_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s16
+int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  return vqrdmlshq_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s32
+int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  return vqrdmlshq_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_s16
+int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+  return vqrdmlshh_s16(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_s32
+int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return vqrdmlshs_s32(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_lane_s16
+int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+  return vqrdmlshh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_lane_s32
+int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  return vqrdmlshs_lane_s32(a, b, c, 1);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_laneq_s16
+int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+  return vqrdmlshh_laneq_s16(a, b, c, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_laneq_s32
+int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  return vqrdmlshs_laneq_s32(a, b, c, 3);
+}
+
diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c
index 4343371..08f3960 100644
--- a/test/CodeGen/aarch64-varargs.c
+++ b/test/CodeGen/aarch64-varargs.c
@@ -23,21 +23,19 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
-// CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
-// CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 4
-// CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK-BE: [[STACK_VAL:%[0-9]+]] = ptrtoint i8* [[STACK]] to i64
-// CHECK-BE: [[STACK_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[STACK_VAL]], 4
-// CHECK-BE: [[STACK:%[0-9]+]] = inttoptr i64 [[STACK_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4
+// CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32*
+// CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
@@ -63,7 +61,7 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
@@ -73,7 +71,7 @@
 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[ALIGNED_STACK_PTR]], i32 16
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
 // CHECK: br label %[[VAARG_END]]
@@ -104,14 +102,14 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK-NOT: and i64
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END]]
@@ -141,13 +139,13 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END]]
@@ -172,16 +170,15 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
-// CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
-// CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 8
-// CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 8
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to double*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double*
 // CHECK: br label %[[VAARG_END]]
@@ -211,17 +208,17 @@
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 0
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 12
+// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 0
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 12
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i64 0, i64 0
 // CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 16
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 28
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 16
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 28
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA]], i32 0, i32 1
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA]], i64 0, i64 1
 // CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa*
@@ -229,7 +226,7 @@
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa*
 // CHECK: br label %[[VAARG_END]]
diff --git a/test/CodeGen/adc-builtins.c b/test/CodeGen/adc-builtins.c
index 5577d22..5e58905 100644
--- a/test/CodeGen/adc-builtins.c
+++ b/test/CodeGen/adc-builtins.c
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ffreestanding -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +adx -emit-llvm -o - %s | FileCheck %s
+
+#define __MM_MALLOC_H
 
 #include <x86intrin.h>
 
diff --git a/test/CodeGen/address-safety-attr-kasan.cpp b/test/CodeGen/address-safety-attr-kasan.cpp
new file mode 100644
index 0000000..c84ba88
--- /dev/null
+++ b/test/CodeGen/address-safety-attr-kasan.cpp
@@ -0,0 +1,38 @@
+// Make sure the sanitize_address attribute is emitted when using both ASan and KASan.
+// Also document that __attribute__((no_sanitize_address)) doesn't disable KASan instrumentation.
+
+/// RUN: %clang_cc1 -triple i386-unknown-linux -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NOASAN %s
+/// RUN: %clang_cc1 -triple i386-unknown-linux -fsanitize=address -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-ASAN %s
+/// RUN: %clang_cc1 -triple i386-unknown-linux -fsanitize=kernel-address -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-KASAN %s
+
+int HasSanitizeAddress() {
+  return 1;
+}
+// CHECK-NOASAN: {{Function Attrs: nounwind$}}
+// CHECK-ASAN: Function Attrs: nounwind sanitize_address
+// CHECK-KASAN: Function Attrs: nounwind sanitize_address
+
+__attribute__((no_sanitize("address")))
+int NoSanitizeQuoteAddress() {
+  return 0;
+}
+// CHECK-NOASAN: {{Function Attrs: nounwind$}}
+// CHECK-ASAN: {{Function Attrs: nounwind$}}
+// CHECK-KASAN: {{Function Attrs: nounwind sanitize_address$}}
+
+__attribute__((no_sanitize_address))
+int NoSanitizeAddress() {
+  return 0;
+}
+// CHECK-NOASAN: {{Function Attrs: nounwind$}}
+// CHECK-ASAN: {{Function Attrs: nounwind$}}
+// CHECK-KASAN: {{Function Attrs: nounwind sanitize_address$}}
+
+__attribute__((no_sanitize("kernel-address")))
+int NoSanitizeKernelAddress() {
+  return 0;
+}
+
+// CHECK-NOASAN: {{Function Attrs: nounwind$}}
+// CHECK-ASAN: {{Function Attrs: nounwind sanitize_address$}}
+// CHECK-KASAN: {{Function Attrs: nounwind$}}
diff --git a/test/CodeGen/alias.c b/test/CodeGen/alias.c
index b773cc8..a14bc0e 100644
--- a/test/CodeGen/alias.c
+++ b/test/CodeGen/alias.c
@@ -1,27 +1,51 @@
+// REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefix=CHECKBASIC %s
 // RUN: %clang_cc1 -triple armv7a-eabi -mfloat-abi hard -emit-llvm -o - %s | FileCheck -check-prefix=CHECKCC %s
+// RUN: %clang_cc1 -triple armv7a-eabi -mfloat-abi hard -S -o - %s | FileCheck -check-prefix=CHECKASM %s
 
 int g0;
-// CHECKBASIC: @g0 = common global i32 0
+// CHECKBASIC-DAG: @g0 = common global i32 0
+// CHECKASM-DAG: .comm g0,4,4
 __thread int TL_WITH_ALIAS;
 // CHECKBASIC-DAG: @TL_WITH_ALIAS = thread_local global i32 0, align 4
+// CHECKASM-DAG: .globl TL_WITH_ALIAS
+// CHECKASM-DAG: .size TL_WITH_ALIAS, 4
 static int bar1 = 42;
-// CHECKBASIC: @bar1 = internal global i32 42
+// CHECKBASIC-DAG: @bar1 = internal global i32 42
+// CHECKASM-DAG: bar1:
+// CHECKASM-DAG: .size bar1, 4
+
+// PR24379: alias variable expected to have same size as aliasee even when types differ
+const int wacom_usb_ids[] = {1, 1, 2, 3, 5, 8, 13, 0};
+// CHECKBASIC-DAG: @wacom_usb_ids = constant [8 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 0], align 4
+// CHECKASM-DAG: .globl wacom_usb_ids
+// CHECKASM-DAG: .size wacom_usb_ids, 32
+extern const int __mod_usb_device_table __attribute__ ((alias("wacom_usb_ids")));
+// CHECKBASIC-DAG: @__mod_usb_device_table = alias i32, getelementptr inbounds ([8 x i32], [8 x i32]* @wacom_usb_ids, i32 0, i32 0)
+// CHECKASM-DAG: .globl __mod_usb_device_table
+// CHECKASM-DAG: __mod_usb_device_table = wacom_usb_ids
+// CHECKASM-DAG-NOT: .size __mod_usb_device_table
 
 extern int g1;
 extern int g1 __attribute((alias("g0")));
-// CHECKBASIC-DAG: @g1 = alias i32* @g0
+// CHECKBASIC-DAG: @g1 = alias i32, i32* @g0
+// CHECKASM-DAG: .globl g1
+// CHECKASM-DAG: g1 = g0
+// CHECKASM-DAG-NOT: .size g1
 
 extern __thread int __libc_errno __attribute__ ((alias ("TL_WITH_ALIAS")));
-// CHECKBASIC-DAG: @__libc_errno = thread_local alias i32* @TL_WITH_ALIAS
+// CHECKBASIC-DAG: @__libc_errno = thread_local alias i32, i32* @TL_WITH_ALIAS
+// CHECKASM-DAG: .globl __libc_errno
+// CHECKASM-DAG: __libc_errno = TL_WITH_ALIAS
+// CHECKASM-DAG-NOT: .size __libc_errno
 
 void f0(void) { }
 extern void f1(void);
 extern void f1(void) __attribute((alias("f0")));
-// CHECKBASIC-DAG: @f1 = alias void ()* @f0
-// CHECKBASIC-DAG: @test8_foo = weak alias bitcast (void ()* @test8_bar to void (...)*)
-// CHECKBASIC-DAG: @test8_zed = alias bitcast (void ()* @test8_bar to void (...)*)
-// CHECKBASIC-DAG: @test9_zed = alias void ()* @test9_bar
+// CHECKBASIC-DAG: @f1 = alias void (), void ()* @f0
+// CHECKBASIC-DAG: @test8_foo = weak alias void (...), bitcast (void ()* @test8_bar to void (...)*)
+// CHECKBASIC-DAG: @test8_zed = alias void (...), bitcast (void ()* @test8_bar to void (...)*)
+// CHECKBASIC-DAG: @test9_zed = alias void (), void ()* @test9_bar
 // CHECKBASIC: define void @f0() [[NUW:#[0-9]+]] {
 
 // Make sure that aliases cause referenced values to be emitted.
@@ -41,7 +65,7 @@
 static int inner_weak(int a) { return 0; }
 extern __typeof(inner) inner_a __attribute__((alias("inner")));
 static __typeof(inner_weak) inner_weak_a __attribute__((weakref, alias("inner_weak")));
-// CHECKCC: @inner_a = alias i32 (i32)* @inner
+// CHECKCC: @inner_a = alias i32 (i32), i32 (i32)* @inner
 // CHECKCC: define internal arm_aapcs_vfpcc i32 @inner(i32 %a) [[NUW:#[0-9]+]] {
 
 int outer(int a) { return inner(a); }
diff --git a/test/CodeGen/align-global-large.c b/test/CodeGen/align-global-large.c
index fcbe758..14f5d8d 100644
--- a/test/CodeGen/align-global-large.c
+++ b/test/CodeGen/align-global-large.c
@@ -1,5 +1,5 @@
 // PR13606 - Clang crashes with large alignment attribute
-// RUN: %clang -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang -cc1 -S -emit-llvm %s -o - -triple i686-pc-gnu | FileCheck %s
 
 // CHECK: x
 // CHECK: align
diff --git a/test/CodeGen/align-systemz.c b/test/CodeGen/align-systemz.c
index 68a21e3..eaa1de6 100644
--- a/test/CodeGen/align-systemz.c
+++ b/test/CodeGen/align-systemz.c
@@ -25,3 +25,19 @@
   s = es;
 }
 
+
+// Alignment should be respected for coerced argument loads
+
+struct arg { long y __attribute__((packed, aligned(4))); };
+
+extern struct arg x;
+void f(struct arg);
+
+void test (void)
+{
+  f(x);
+}
+
+// CHECK-LABEL: @test
+// CHECK: load i64, i64* getelementptr inbounds (%struct.arg, %struct.arg* @x, i32 0, i32 0), align 4
+
diff --git a/test/CodeGen/align-wasm.c b/test/CodeGen/align-wasm.c
new file mode 100644
index 0000000..ff0c213
--- /dev/null
+++ b/test/CodeGen/align-wasm.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+
+void test1_f(void *);
+
+void test1_g(void) {
+  float x[4];
+  test1_f(x);
+}
+// CHECK: @test1_g
+// CHECK: alloca [4 x float], align 16
diff --git a/test/CodeGen/alignment.c b/test/CodeGen/alignment.c
index 0a59801..c9f5813 100644
--- a/test/CodeGen/alignment.c
+++ b/test/CodeGen/alignment.c
@@ -59,3 +59,11 @@
 // CHECK: @test4(
 // CHECK: store <4 x float> {{.*}}, <4 x float>* {{.*}}, align 64
 
+// PR24944 - Typedef alignment not honored on no-op cast.
+typedef float __attribute__((vector_size(16), aligned(16))) float4align16;
+typedef float __attribute__((vector_size(16), aligned(2))) float4align2;
+void test6(float4align64 *p) {
+    float4align64 vec = *(float4align2*) p;
+}
+// CHECK-LABEL: @test6
+// CHECK:       load <4 x float>, <4 x float>* {{.*}}, align 2
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
index 88bf593..8d113d6 100644
--- a/test/CodeGen/arm-abi-vector.c
+++ b/test/CodeGen/arm-abi-vector.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+// RUN: %clang_cc1 -triple arm-linux-androideabi -emit-llvm -o - %s | FileCheck -check-prefix=ANDROID %s
 
 #include <stdarg.h>
 
@@ -14,18 +15,28 @@
 // Passing legal vector types as varargs.
 double varargs_vec_2i(int fixed, ...) {
 // CHECK: varargs_vec_2i
-// CHECK: alloca <2 x i32>, align 8
-// CHECK: [[ALIGN:%.*]] = and i32 [[VAR:%.*]], -8
+// CHECK: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
-// CHECK: bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// CHECK: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 8
+// CHECK: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
 // APCS-GNU: varargs_vec_2i
-// APCS-GNU: alloca <2 x i32>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <2 x i32>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <2 x i32>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <2 x i32>, <2 x i32>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <2 x i32>*
+// APCS-GNU: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 4
+// APCS-GNU: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
+// ANDROID: varargs_vec_2i
+// ANDROID: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// ANDROID: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 8
+// ANDROID: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -40,18 +51,24 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_2i
 // APCS-GNU: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_2i
+// ANDROID: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
   return varargs_vec_2i(3, *in);
 }
 
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // CHECK: bitcast i8* [[AP]] to <3 x i8>*
 // APCS-GNU: varargs_vec_3c
 // APCS-GNU: alloca <3 x i8>, align 4
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: bitcast i8* [[AP]] to <3 x i8>*
+// ANDROID: varargs_vec_3c
+// ANDROID: alloca <3 x i8>, align 4
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: bitcast i8* [[AP]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -66,23 +83,35 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
 // APCS-GNU: test_3c
 // APCS-GNU: call double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
+// ANDROID: test_3c
+// ANDROID: call double (i32, ...) @varargs_vec_3c(i32 3, <3 x i8> {{%.*}})
   return varargs_vec_3c(3, *in);
 }
 
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
-// CHECK: alloca <5 x i8>, align 8
+// CHECK: [[VAR:%.*]] = alloca <5 x i8>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
-// CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// CHECK: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 8
+// CHECK: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
 // APCS-GNU: varargs_vec_5c
-// APCS-GNU: alloca <5 x i8>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <5 x i8>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i8>, <5 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <5 x i8>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i8>*
+// APCS-GNU: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 4
+// APCS-GNU: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
+// ANDROID: varargs_vec_5c
+// ANDROID: [[VAR:%.*]] = alloca <5 x i8>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// ANDROID: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -97,26 +126,35 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
 // APCS-GNU: test_5c
 // APCS-GNU: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
+// ANDROID: test_5c
+// ANDROID: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
   return varargs_vec_5c(5, *in);
 }
 
 double varargs_vec_9c(int fixed, ...) {
 // CHECK: varargs_vec_9c
-// CHECK: alloca <9 x i8>, align 16
-// CHECK: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
+// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 16
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
-// CHECK: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
-// CHECK: call void @llvm.memcpy
-// CHECK: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
+// CHECK: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
+// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
 // APCS-GNU: varargs_vec_9c
-// APCS-GNU: alloca <9 x i8>, align 16
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
-// APCS-GNU: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
+// APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
+// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 16
+// ANDROID: varargs_vec_9c
+// ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
+// ANDROID: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -131,20 +169,24 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
 // APCS-GNU: test_9c
 // APCS-GNU: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
+// ANDROID: test_9c
+// ANDROID: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
   return varargs_vec_9c(9, *in);
 }
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
 // APCS-GNU: varargs_vec_19c
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
-// APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// APCS-GNU: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// APCS-GNU: bitcast i8* [[VAR2]] to <19 x i8>*
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// APCS-GNU: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
+// ANDROID: varargs_vec_19c
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// ANDROID: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -159,6 +201,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
 // APCS-GNU: test_19c
 // APCS-GNU: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
+// ANDROID: test_19c
+// ANDROID: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
   return varargs_vec_19c(19, *in);
 }
 
@@ -167,15 +211,20 @@
 // CHECK: alloca <3 x i16>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
 // APCS-GNU: varargs_vec_3s
-// APCS-GNU: alloca <3 x i16>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <3 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <3 x i16>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <3 x i16>, <3 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <3 x i16>*
+// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, <3 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_3s
+// ANDROID: alloca <3 x i16>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -190,26 +239,34 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_3s
 // APCS-GNU: call double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_3s
+// ANDROID: call double (i32, ...) @varargs_vec_3s(i32 3, <3 x i16> {{%.*}})
   return varargs_vec_3s(3, *in);
 }
 
 double varargs_vec_5s(int fixed, ...) {
 // CHECK: varargs_vec_5s
-// CHECK: alloca <5 x i16>, align 16
-// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
+// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
-// CHECK: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
-// CHECK: call void @llvm.memcpy
-// CHECK: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
+// CHECK: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
+// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
 // APCS-GNU: varargs_vec_5s
-// APCS-GNU: alloca <5 x i16>, align 16
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
-// APCS-GNU: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 16
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
+// APCS-GNU: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_5s
+// ANDROID: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
+// ANDROID: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -224,6 +281,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
 // APCS-GNU: test_5s
 // APCS-GNU: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
+// ANDROID: test_5s
+// ANDROID: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
   return varargs_vec_5s(5, *in);
 }
 
@@ -238,13 +297,18 @@
 // CHECK: varargs_struct
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
 // APCS-GNU: varargs_struct
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca %struct.StructWithVec
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast %struct.StructWithVec* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
+// ANDROID: varargs_struct
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -259,5 +323,7 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
 // APCS-GNU: test_struct
 // APCS-GNU: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
+// ANDROID: test_struct
+// ANDROID: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
   return varargs_struct(3, *d);
 }
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index b671626..ec3e173 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -159,13 +159,13 @@
 struct s31 { char x; };
 void f31(struct s31 s) { }
 // AAPCS: @f31([1 x i32] %s.coerce)
-// AAPCS: %s = alloca %struct.s31, align 4
-// AAPCS: alloca [1 x i32]
-// AAPCS: store [1 x i32] %s.coerce, [1 x i32]*
+// AAPCS: %s = alloca %struct.s31, align 1
+// AAPCS: [[TEMP:%.*]] = alloca [1 x i32], align 4
+// AAPCS: store [1 x i32] %s.coerce, [1 x i32]* [[TEMP]], align 4
 // APCS-GNU: @f31([1 x i32] %s.coerce)
-// APCS-GNU: %s = alloca %struct.s31, align 4
-// APCS-GNU: alloca [1 x i32]
-// APCS-GNU: store [1 x i32] %s.coerce, [1 x i32]*
+// APCS-GNU: %s = alloca %struct.s31, align 1
+// APCS-GNU: [[TEMP:%.*]] = alloca [1 x i32], align 4
+// APCS-GNU: store [1 x i32] %s.coerce, [1 x i32]* [[TEMP]], align 4
 
 // PR13562
 struct s32 { double x; };
diff --git a/test/CodeGen/arm-eabi.c b/test/CodeGen/arm-eabi.c
new file mode 100644
index 0000000..0dc04f5
--- /dev/null
+++ b/test/CodeGen/arm-eabi.c
@@ -0,0 +1,20 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target arm-none-eabi -S -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-eabi -S -meabi gnu -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-eabihf -S -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-eabihf -S -meabi gnu -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabi -S -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabi -S -meabi 5 -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-gnueabihf -S -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabihf -S -meabi 5 -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+
+struct my_s {
+  unsigned long a[18];
+};
+
+// CHECK-LABEL: foo
+// CHECK-EABI: bl __aeabi_memcpy4
+// CHECK-GNUEABI: bl memcpy
+void foo(unsigned long *t) {
+  *(struct my_s *)t = *((struct my_s *)(1UL));
+}
diff --git a/test/CodeGen/arm-fp16-arguments.c b/test/CodeGen/arm-fp16-arguments.c
new file mode 100644
index 0000000..15a9ceb
--- /dev/null
+++ b/test/CodeGen/arm-fp16-arguments.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+
+__fp16 g;
+
+void t1(__fp16 a) { g = a; }
+// SOFT: define void @t1(i32 [[PARAM:%.*]])
+// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
+// HARD: define arm_aapcs_vfpcc void @t1(float [[PARAM:%.*]])
+// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
+// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
+// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @g to i16*)
+
+__fp16 t2() { return g; }
+// SOFT: define i32 @t2()
+// HARD: define arm_aapcs_vfpcc float @t2()
+// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @g to i16*)
+// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
+// SOFT: ret i32 [[ZEXT]]
+// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
+// HARD: ret float [[BITCAST]]
diff --git a/test/CodeGen/arm-long-calls.c b/test/CodeGen/arm-long-calls.c
new file mode 100644
index 0000000..fdd7bab
--- /dev/null
+++ b/test/CodeGen/arm-long-calls.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5  -target-feature +long-calls -emit-llvm -o - %s | FileCheck -check-prefix=LONGCALL %s
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5 -emit-llvm -o - %s | FileCheck -check-prefix=NOLONGCALL %s
+
+// LONGCALL: attributes #0 = { {{.*}} "target-features"="+long-calls"
+// NOLONGCALL-NOT: attributes #0 = { {{.*}} "target-features"="+long-calls"
+
+int foo1(int a) { return a; }
diff --git a/test/CodeGen/arm-microsoft-intrinsics.c b/test/CodeGen/arm-microsoft-intrinsics.c
index 5f19e5e..e073cc2 100644
--- a/test/CodeGen/arm-microsoft-intrinsics.c
+++ b/test/CodeGen/arm-microsoft-intrinsics.c
@@ -47,17 +47,17 @@
 // CHECK-MSVC: @llvm.arm.mrc2(i32 0, i32 0, i32 0, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveFromCoprocessor2'
 
-void check_MoveToCoprocessor(void) {
-  _MoveToCoprocessor(0, 0, 0, 0, 0, 0);
+void check_MoveToCoprocessor(unsigned int value) {
+  _MoveToCoprocessor(value, 10, 7, 1, 0, 0);
 }
 
-// CHECK-MSVC: @llvm.arm.mcr(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+// CHECK-MSVC: @llvm.arm.mcr(i32 10, i32 7, i32 %{{[^,]*}}, i32 1, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveToCoprocessor'
 
-void check_MoveToCoprocessor2(void) {
-  _MoveToCoprocessor2(0, 0, 0, 0, 0, 0);
+void check_MoveToCoprocessor2(unsigned int value) {
+  _MoveToCoprocessor2(value, 10, 7, 1, 0, 0);
 }
 
-// CHECK-MSVC: @llvm.arm.mcr2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+// CHECK-MSVC: @llvm.arm.mcr2(i32 10, i32 7, i32 %{{[^,]*}}, i32 1, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveToCoprocessor2'
 
diff --git a/test/CodeGen/arm-neon-misc.c b/test/CodeGen/arm-neon-misc.c
index 56ce316..e7ba580 100644
--- a/test/CodeGen/arm-neon-misc.c
+++ b/test/CodeGen/arm-neon-misc.c
@@ -14,20 +14,20 @@
 void t1(uint64_t *src, uint8_t *dst) {
 // CHECK: @t1
   uint64x2_t q = vld1q_u64(src);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
   vst1q_lane_u64(dst, q, 1);
 // CHECK: bitcast <16 x i8> %{{.*}} to <2 x i64>
 // CHECK: shufflevector <2 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.v1i64
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64
 }
 
 void t2(uint64_t *src1, uint8_t *src2, uint64x2_t *dst) {
 // CHECK: @t2
     uint64x2_t q = vld1q_u64(src1);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
     q = vld1q_lane_u64(src2, q, 0);
 // CHECK: shufflevector <2 x i64>
-// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64
+// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8
 // CHECK: shufflevector <1 x i64>
     *dst = q;
 // CHECK: store <2 x i64>
diff --git a/test/CodeGen/arm-no-movt.c b/test/CodeGen/arm-no-movt.c
new file mode 100644
index 0000000..0773941
--- /dev/null
+++ b/test/CodeGen/arm-no-movt.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5  -target-feature +no-movt -emit-llvm -o - %s | FileCheck -check-prefix=NO-MOVT %s
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5 -emit-llvm -o - %s | FileCheck -check-prefix=MOVT %s
+
+// NO-MOVT: attributes #0 = { {{.*}} "target-features"="+no-movt"
+// MOVT-NOT: attributes #0 = { {{.*}} "target-features"="+no-movt"
+
+int foo1(int a) { return a; }
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index ece8bdf..35c0e04 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -1,12 +1,15 @@
 // REQUIRES: arm-registered-target
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a9 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// CHECK-VFP3: "target-features"="+neon,+vfp3"
+// CHECK-VFP3: "target-features"="+dsp,+neon,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a9 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-FP16
+// CHECK-VFP3-FP16: "target-features"="+dsp,+fp16,+neon,+vfp3"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4
-// CHECK-VFP4: "target-features"="+vfp4,+neon"
+// CHECK-VFP4: "target-features"="+dsp,+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
@@ -15,24 +18,42 @@
 // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a17 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7s-linux-gnueabi -target-cpu swift -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu krait -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
-// CHECK-VFP4-DIV: "target-features"="+vfp4,+neon,+hwdiv,+hwdiv-arm"
+// CHECK-VFP4-DIV: "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a35 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a72 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
-// CHECK-BASIC-V8: "target-features"="+neon,+fp-armv8,+hwdiv,+crypto,+crc,+hwdiv-arm"
+// CHECK-BASIC-V8: "target-features"="+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon"
 
 
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-DIV
-// CHECK-DIV: "target-features"="+hwdiv,+hwdiv-arm"
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-DIV
+// CHECK-VFP3-D16-DIV: "target-features"="+d16,+dsp,+hwdiv,+hwdiv-arm,+vfp3"
+
+
+// RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-THUMB-DIV
+// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+d16,+dsp,+hwdiv,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV
+// CHECK-VFP3-D16-FP16-DIV: "target-features"="+d16,+dsp,+fp16,+hwdiv,+hwdiv-arm,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-D16-SP-THUMB-DIV
+// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+d16,+dsp,+fp-only-sp,+hwdiv,+vfp4"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP5-D16-THUMB-DIV
+// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+d16,+dsp,+fp-armv8,+hwdiv"
+
 
 // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// CHECK-THUMB-DIV: "target-features"="+hwdiv"
+// CHECK-THUMB-DIV: "target-features"="+dsp,+hwdiv"
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV-M3
+// CHECK-THUMB-DIV-M3: "target-features"="+hwdiv"
 
 
 void foo() {}
diff --git a/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
new file mode 100644
index 0000000..5fe299a
--- /dev/null
+++ b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \
+// RUN:  -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+// REQUIRES: arm-registered-target,aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: test_vqrdmlah_s16
+int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  return vqrdmlah_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlah_s32
+int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  return vqrdmlah_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s16
+int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  return vqrdmlahq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s32
+int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  return vqrdmlahq_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlah_lane_s16
+int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  return vqrdmlah_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlah_lane_s32
+int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vqrdmlah_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s16
+int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  return vqrdmlahq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s32
+int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vqrdmlahq_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_s16
+int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  return vqrdmlsh_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_s32
+int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  return vqrdmlsh_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s16
+int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  return vqrdmlshq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s32
+int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  return vqrdmlshq_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s16
+int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  return vqrdmlsh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s32
+int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vqrdmlsh_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s16
+int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  return vqrdmlshq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s32
+int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vqrdmlshq_lane_s32(a, b, c, 1);
+}
+
diff --git a/test/CodeGen/arm-vector-align.c b/test/CodeGen/arm-vector-align.c
index 15dd13e..87e8391 100644
--- a/test/CodeGen/arm-vector-align.c
+++ b/test/CodeGen/arm-vector-align.c
@@ -14,9 +14,9 @@
 typedef float AlignedAddr __attribute__ ((aligned (16)));
 void t1(AlignedAddr *addr1, AlignedAddr *addr2) {
 // CHECK: @t1
-// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %{{.*}}, i32 16)
+// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %{{.*}}, i32 16)
   float32x4_t a = vld1q_f32(addr1);
-// CHECK: call void @llvm.arm.neon.vst1.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
   vst1q_f32(addr2, a);
 }
 
diff --git a/test/CodeGen/arm64-abi-vector.c b/test/CodeGen/arm64-abi-vector.c
index 4566c41..29aeadb 100644
--- a/test/CodeGen/arm64-abi-vector.c
+++ b/test/CodeGen/arm64-abi-vector.c
@@ -16,7 +16,7 @@
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
@@ -36,7 +36,7 @@
 double varargs_vec_4c(int fixed, ...) {
 // CHECK: varargs_vec_4c
 // CHECK: alloca <4 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <4 x i8>*
   va_list ap;
   double sum = fixed;
@@ -56,7 +56,7 @@
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
 // CHECK: alloca <5 x i8>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   va_list ap;
   double sum = fixed;
@@ -78,7 +78,7 @@
 // CHECK: alloca <9 x i8>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   va_list ap;
   double sum = fixed;
@@ -97,10 +97,9 @@
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -119,7 +118,7 @@
 double varargs_vec_3s(int fixed, ...) {
 // CHECK: varargs_vec_3s
 // CHECK: alloca <3 x i16>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
@@ -141,7 +140,7 @@
 // CHECK: alloca <5 x i16>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   va_list ap;
   double sum = fixed;
@@ -163,7 +162,7 @@
 // CHECK: alloca <3 x i32>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   va_list ap;
   double sum = fixed;
@@ -183,10 +182,9 @@
 double varargs_vec_5i(int fixed, ...) {
 // CHECK: varargs_vec_5i
 // CHECK: alloca <5 x i32>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <5 x i32>**
+// CHECK: [[VAR2:%.*]] = load <5 x i32>*, <5 x i32>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -205,10 +203,9 @@
 double varargs_vec_3d(int fixed, ...) {
 // CHECK: varargs_vec_3d
 // CHECK: alloca <3 x double>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <3 x double>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <3 x double>**
+// CHECK: [[VAR2:%.*]] = load <3 x double>*, <3 x double>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -230,52 +227,49 @@
   double sum = fixed;
   va_start(ap, fixed);
   __char3 c3 = va_arg(ap, __char3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   sum = sum + c3.x + c3.y;
   __char5 c5 = va_arg(ap, __char5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   sum = sum + c5.x + c5.y;
   __char9 c9 = va_arg(ap, __char9);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   sum = sum + c9.x + c9.y;
   __char19 c19 = va_arg(ap, __char19);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   sum = sum + c19.x + c19.y;
   __short3 s3 = va_arg(ap, __short3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   sum = sum + s3.x + s3.y;
   __short5 s5 = va_arg(ap, __short5);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   sum = sum + s5.x + s5.y;
   __int3 i3 = va_arg(ap, __int3);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   sum = sum + i3.x + i3.y;
   __int5 i5 = va_arg(ap, __int5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <5 x i32>**
+// CHECK: [[VAR2:%.*]] = load <5 x i32>*, <5 x i32>** [[VAR]]
   sum = sum + i5.x + i5.y;
   __double3 d3 = va_arg(ap, __double3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <3 x double>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <3 x double>**
+// CHECK: [[VAR2:%.*]] = load <3 x double>*, <3 x double>** [[VAR]]
   sum = sum + d3.x + d3.y;
   va_end(ap);
   return sum;
@@ -309,7 +303,7 @@
 // CHECK: args_vec_5c
 // CHECK: [[C5:%.*]] = alloca <5 x i8>, align 8
 // CHECK: [[TMP:%.*]] = bitcast <5 x i8>* [[C5]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
   double sum = fixed;
   sum = sum + c5.x + c5.y;
   return sum;
@@ -325,7 +319,7 @@
 // CHECK: args_vec_9c
 // CHECK: [[C9:%.*]] = alloca <9 x i8>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <9 x i8>* [[C9]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c9.x + c9.y;
   return sum;
@@ -355,7 +349,7 @@
 // CHECK: args_vec_3s
 // CHECK: [[C3:%.*]] = alloca <3 x i16>, align 8
 // CHECK: [[TMP:%.*]] = bitcast <3 x i16>* [[C3]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
   double sum = fixed;
   sum = sum + c3.x + c3.y;
   return sum;
@@ -371,7 +365,7 @@
 // CHECK: args_vec_5s
 // CHECK: [[C5:%.*]] = alloca <5 x i16>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <5 x i16>* [[C5]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c5.x + c5.y;
   return sum;
@@ -387,7 +381,7 @@
 // CHECK: args_vec_3i
 // CHECK: [[C3:%.*]] = alloca <3 x i32>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <3 x i32>* [[C3]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c3.x + c3.y;
   return sum;
diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c
index 4486bb4..93a1a19 100644
--- a/test/CodeGen/arm64-arguments.c
+++ b/test/CodeGen/arm64-arguments.c
@@ -117,7 +117,7 @@
 struct s31 { char x; };
 void f31(struct s31 s) { }
 // CHECK: define void @f31(i64 %s.coerce)
-// CHECK: %s = alloca %struct.s31, align 8
+// CHECK: %s = alloca %struct.s31, align 1
 // CHECK: trunc i64 %s.coerce to i8
 // CHECK: store i8 %{{.*}},
 
@@ -219,8 +219,8 @@
 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s36, align 16
 // CHECK: %s2 = alloca %struct.s36, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
@@ -273,10 +273,10 @@
 __attribute__ ((noinline))
 int f38(int i, s38_no_align s1, s38_no_align s2) {
 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
-// CHECK: %s1 = alloca %struct.s38, align 8
-// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: %s1 = alloca %struct.s38, align 4
+// CHECK: %s2 = alloca %struct.s38, align 4
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -287,8 +287,8 @@
 s38_no_align g38_2;
 int caller38() {
 // CHECK: define i32 @caller38()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
   return f38(3, g38, g38_2);
 }
@@ -297,10 +297,10 @@
 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s38_no_align s1, s38_no_align s2) {
 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
-// CHECK: %s1 = alloca %struct.s38, align 8
-// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: %s1 = alloca %struct.s38, align 4
+// CHECK: %s2 = alloca %struct.s38, align 4
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -309,8 +309,8 @@
 }
 int caller38_stack() {
 // CHECK: define i32 @caller38_stack()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
 }
@@ -328,8 +328,8 @@
 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -340,8 +340,8 @@
 s39_with_align g39_2;
 int caller39() {
 // CHECK: define i32 @caller39()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
   return f39(3, g39, g39_2);
 }
@@ -352,8 +352,8 @@
 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -362,8 +362,8 @@
 }
 int caller39_stack() {
 // CHECK: define i32 @caller39_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
 }
@@ -381,10 +381,10 @@
 __attribute__ ((noinline))
 int f40(int i, s40_no_align s1, s40_no_align s2) {
 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
-// CHECK: %s1 = alloca %struct.s40, align 8
-// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: %s1 = alloca %struct.s40, align 4
+// CHECK: %s2 = alloca %struct.s40, align 4
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -395,8 +395,8 @@
 s40_no_align g40_2;
 int caller40() {
 // CHECK: define i32 @caller40()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40(3, g40, g40_2);
 }
@@ -405,10 +405,10 @@
 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s40_no_align s1, s40_no_align s2) {
 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
-// CHECK: %s1 = alloca %struct.s40, align 8
-// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: %s1 = alloca %struct.s40, align 4
+// CHECK: %s2 = alloca %struct.s40, align 4
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -417,8 +417,8 @@
 }
 int caller40_stack() {
 // CHECK: define i32 @caller40_stack()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
 }
@@ -438,8 +438,8 @@
 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -450,8 +450,8 @@
 s41_with_align g41_2;
 int caller41() {
 // CHECK: define i32 @caller41()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
   return f41(3, g41, g41_2);
 }
@@ -462,8 +462,8 @@
 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -472,8 +472,8 @@
 }
 int caller41_stack() {
 // CHECK: define i32 @caller41_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
 }
@@ -629,7 +629,7 @@
 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HFA is not indirect, so occupies its full 16 bytes on the stack.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 16
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
@@ -656,12 +656,11 @@
 
   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HFAPTR:%.*]] = load i8*, i8** [[HFAPTRPTR]]
-// CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA*
+// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
+// CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
@@ -679,12 +678,12 @@
 
   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
   // must be properly aligned.
-// CHECK: [[ALIGN0:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 15
-// CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64
+// CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
+// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
 
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[ALIGNED_LIST]], i32 32
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
@@ -705,12 +704,11 @@
 
   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HVAPTR:%.*]] = load i8*, i8** [[HVAPTRPTR]]
-// CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA*
+// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
+// CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
diff --git a/test/CodeGen/arm64-be-bitfield.c b/test/CodeGen/arm64-be-bitfield.c
index b8d497c..132239a 100644
--- a/test/CodeGen/arm64-be-bitfield.c
+++ b/test/CodeGen/arm64-be-bitfield.c
@@ -7,7 +7,7 @@
 // Get the high 32-bits and then shift appropriately for big-endian.
 signed callee_b0f(struct bt3 bp11) {
 // IR: callee_b0f(i64 [[ARG:%.*]])
-// IR: store i64 [[ARG]], i64* [[PTR:%.*]]
+// IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8
 // IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8*
 // IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4
 // ARM: asr x0, x0, #54
diff --git a/test/CodeGen/arm64-be-hfa-vararg.c b/test/CodeGen/arm64-be-hfa-vararg.c
index 537aab5..c225724 100644
--- a/test/CodeGen/arm64-be-hfa-vararg.c
+++ b/test/CodeGen/arm64-be-hfa-vararg.c
@@ -4,7 +4,15 @@
 
 // A single member HFA must be aligned just like a non-HFA register argument.
 double callee(int a, ...) {
-// CHECK: = add i64 %{{.*}}, 8
+// CHECK: [[REGPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 2
+// CHECK: [[REGP:%.*]] = load i8*, i8** [[REGPP]], align 8
+// CHECK: [[OFFSET0:%.*]] = getelementptr inbounds i8, i8* [[REGP]], i32 {{.*}}
+// CHECK: [[OFFSET1:%.*]] = getelementptr inbounds i8, i8* [[OFFSET0]], i64 8
+
+// CHECK: [[MEMPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 0
+// CHECK: [[MEMP:%.*]] = load i8*, i8** [[MEMPP]], align 8
+// CHECK: [[NEXTP:%.*]] = getelementptr inbounds i8, i8* [[MEMP]], i64 8
+// CHECK: store i8* [[NEXTP]], i8** [[MEMPP]], align 8
   va_list vl;
   va_start(vl, a);
   double result = va_arg(vl, struct { double a; }).a;
diff --git a/test/CodeGen/arm64_vget.c b/test/CodeGen/arm64_vget.c
deleted file mode 100644
index 62b68ef..0000000
--- a/test/CodeGen/arm64_vget.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
-// Test ARM64 SIMD vget intrinsics
-
-#include <arm_neon.h>
-
-float64_t test_vget_lane_f64(float64x1_t a1) {
-  // CHECK: test_vget_lane_f64
-  // why isn't 1 allowed as second argument?
-  return vget_lane_f64(a1, 0);
-  // CHECK: extractelement {{.*}} i32 0
-  // CHECK-NEXT: ret
-}
-
diff --git a/test/CodeGen/arm64_vset_lane.c b/test/CodeGen/arm64_vset_lane.c
deleted file mode 100644
index 0508123..0000000
--- a/test/CodeGen/arm64_vset_lane.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
-// Test ARM64 SIMD set lane intrinsics INCOMPLETE
-
-#include <arm_neon.h>
-
-float16x4_t test_vset_lane_f16(float16_t *a1, float16x4_t a2) {
-  // CHECK-LABEL: test_vset_lane_f16
-  return vset_lane_f16(*a1, a2, 1);
-  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
-  // CHECK: insertelement <4 x i16> %a2, i16 [[A1]], i32 1
-}
-
-float16x8_t test_vsetq_lane_f16(float16_t *a1, float16x8_t a2) {
-  // CHECK-LABEL: test_vsetq_lane_f16
-  return vsetq_lane_f16(*a1, a2, 4);
-  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
-  // CHECK: insertelement <8 x i16> %a2, i16 [[A1]], i32 4
-}
-
-// problem with scalar_to_vector in backend.  Punt for now
-#if 0
-float64x1_t test_vset_lane_f64(float64_t a1, float64x1_t a2) {
-  // CHECK-LABEL@ test_vset_lane_f64
-  return vset_lane_f64(a1, a2, 0);
-  // CHECK@ @llvm.aarch64.neon.smaxv.i32.v8i8
-}
-#endif
-
-float64x2_t test_vsetq_lane_f64(float64_t a1, float64x2_t a2) {
-  // CHECK-LABEL: test_vsetq_lane_f64
-  return vsetq_lane_f64(a1, a2, 0);
-  // CHECK: insertelement <2 x double> %a2, double %a1, i32 0
-}
diff --git a/test/CodeGen/arm_acle.c b/test/CodeGen/arm_acle.c
index d9d788b..0884394 100644
--- a/test/CodeGen/arm_acle.c
+++ b/test/CodeGen/arm_acle.c
@@ -186,27 +186,53 @@
 
 // ARM-LABEL: test_rev16
 // ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
+// ARM: lshr {{.*}}, 16
+// ARM: shl {{.*}}, 16
 // ARM: or
 uint32_t test_rev16(uint32_t t) {
   return __rev16(t);
 }
 
 // ARM-LABEL: test_rev16l
-// ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
-// ARM: or
+// AArch32: llvm.bswap
+// AArch32: lshr {{.*}}, 16
+// AArch32: shl {{.*}}, 16
+// AArch32: or
+// AArch64: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32
+// AArch64: [[T2:%.*]] = trunc i64 [[T1]] to i32
+// AArch64: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]])
+// AArch64: [[T4:%.*]] = lshr i32 [[T3]], 16
+// AArch64: [[T5:%.*]] = shl i32 [[T3]], 16
+// AArch64: [[T6:%.*]] = or i32 [[T5]], [[T4]]
+// AArch64: [[T7:%.*]] = zext i32 [[T6]] to i64
+// AArch64: [[T8:%.*]] = shl nuw i64 [[T7]], 32
+// AArch64: [[T9:%.*]] = trunc i64 [[IN]] to i32
+// AArch64: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]])
+// AArch64: [[T11:%.*]] = lshr i32 [[T10]], 16
+// AArch64: [[T12:%.*]] = shl i32 [[T10]], 16
+// AArch64: [[T13:%.*]] = or i32 [[T12]], [[T11]]
+// AArch64: [[T14:%.*]] = zext i32 [[T13]] to i64
+// AArch64: [[T15:%.*]] = or i64 [[T8]], [[T14]]
 long test_rev16l(long t) {
   return __rev16l(t);
 }
 
 // ARM-LABEL: test_rev16ll
-// ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
-// ARM: or
+// ARM: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32
+// ARM: [[T2:%.*]] = trunc i64 [[T1]] to i32
+// ARM: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]])
+// ARM: [[T4:%.*]] = lshr i32 [[T3]], 16
+// ARM: [[T5:%.*]] = shl i32 [[T3]], 16
+// ARM: [[T6:%.*]] = or i32 [[T5]], [[T4]]
+// ARM: [[T7:%.*]] = zext i32 [[T6]] to i64
+// ARM: [[T8:%.*]] = shl nuw i64 [[T7]], 32
+// ARM: [[T9:%.*]] = trunc i64 [[IN]] to i32
+// ARM: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]])
+// ARM: [[T11:%.*]] = lshr i32 [[T10]], 16
+// ARM: [[T12:%.*]] = shl i32 [[T10]], 16
+// ARM: [[T13:%.*]] = or i32 [[T12]], [[T11]]
+// ARM: [[T14:%.*]] = zext i32 [[T13]] to i64
+// ARM: [[T15:%.*]] = or i64 [[T8]], [[T14]]
 uint64_t test_rev16ll(uint64_t t) {
   return __rev16ll(t);
 }
@@ -339,8 +365,8 @@
 
 /* 10.1 Special register intrinsics */
 // ARM-LABEL: test_rsr
-// AArch64: call i64 @llvm.read_register.i64(metadata !1)
-// AArch32: call i32 @llvm.read_register.i32(metadata !3)
+// AArch64: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+// AArch32: call i32 @llvm.read_register.i32(metadata ![[M2:[0-9]]])
 uint32_t test_rsr() {
 #ifdef __ARM_32BIT_STATE
   return __arm_rsr("cp1:2:c3:c4:5");
@@ -350,8 +376,8 @@
 }
 
 // ARM-LABEL: test_rsr64
-// AArch64: call i64 @llvm.read_register.i64(metadata !1)
-// AArch32: call i64 @llvm.read_register.i64(metadata !4)
+// AArch64: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+// AArch32: call i64 @llvm.read_register.i64(metadata ![[M3:[0-9]]])
 uint64_t test_rsr64() {
 #ifdef __ARM_32BIT_STATE
   return __arm_rsr64("cp1:2:c3");
@@ -361,15 +387,15 @@
 }
 
 // ARM-LABEL: test_rsrp
-// AArch64: call i64 @llvm.read_register.i64(metadata !2)
-// AArch32: call i32 @llvm.read_register.i32(metadata !5)
+// AArch64: call i64 @llvm.read_register.i64(metadata ![[M1:[0-9]]])
+// AArch32: call i32 @llvm.read_register.i32(metadata ![[M4:[0-9]]])
 void *test_rsrp() {
   return __arm_rsrp("sysreg");
 }
 
 // ARM-LABEL: test_wsr
-// AArch64: call void @llvm.write_register.i64(metadata !1, i64 %{{.*}})
-// AArch32: call void @llvm.write_register.i32(metadata !3, i32 %{{.*}})
+// AArch64: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %{{.*}})
+// AArch32: call void @llvm.write_register.i32(metadata ![[M2:[0-9]]], i32 %{{.*}})
 void test_wsr(uint32_t v) {
 #ifdef __ARM_32BIT_STATE
   __arm_wsr("cp1:2:c3:c4:5", v);
@@ -379,8 +405,8 @@
 }
 
 // ARM-LABEL: test_wsr64
-// AArch64: call void @llvm.write_register.i64(metadata !1, i64 %{{.*}})
-// AArch32: call void @llvm.write_register.i64(metadata !4, i64 %{{.*}})
+// AArch64: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %{{.*}})
+// AArch32: call void @llvm.write_register.i64(metadata ![[M3:[0-9]]], i64 %{{.*}})
 void test_wsr64(uint64_t v) {
 #ifdef __ARM_32BIT_STATE
   __arm_wsr64("cp1:2:c3", v);
@@ -390,15 +416,15 @@
 }
 
 // ARM-LABEL: test_wsrp
-// AArch64: call void @llvm.write_register.i64(metadata !2, i64 %{{.*}})
-// AArch32: call void @llvm.write_register.i32(metadata !5, i32 %{{.*}})
+// AArch64: call void @llvm.write_register.i64(metadata ![[M1:[0-9]]], i64 %{{.*}})
+// AArch32: call void @llvm.write_register.i32(metadata ![[M4:[0-9]]], i32 %{{.*}})
 void test_wsrp(void *v) {
   __arm_wsrp("sysreg", v);
 }
 
-// AArch32: !3 = !{!"cp1:2:c3:c4:5"}
-// AArch32: !4 = !{!"cp1:2:c3"}
-// AArch32: !5 = !{!"sysreg"}
+// AArch32: ![[M2]] = !{!"cp1:2:c3:c4:5"}
+// AArch32: ![[M3]] = !{!"cp1:2:c3"}
+// AArch32: ![[M4]] = !{!"sysreg"}
 
-// AArch64: !1 = !{!"1:2:3:4:5"}
-// AArch64: !2 = !{!"sysreg"}
+// AArch64: ![[M0]] = !{!"1:2:3:4:5"}
+// AArch64: ![[M1]] = !{!"sysreg"}
diff --git a/test/CodeGen/arm_function_epilog.cpp b/test/CodeGen/arm_function_epilog.cpp
new file mode 100644
index 0000000..0089507
--- /dev/null
+++ b/test/CodeGen/arm_function_epilog.cpp
@@ -0,0 +1,17 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple armv7-none-linux-androideabi -target-abi aapcs-linux -mfloat-abi hard -x c++ -emit-llvm %s -o - | FileCheck %s
+
+struct Vec2 {
+    union { struct { float x, y; };
+            float data[2];
+    };
+};
+
+// CHECK: define arm_aapcs_vfpcc %struct.Vec2 @_Z7getVec2v()
+// CHECK: ret %struct.Vec2
+Vec2 getVec2() {
+    Vec2 out;
+    union { Vec2* v; unsigned char* u; } x;
+    x.v = &out;
+    return out;
+}
diff --git a/test/CodeGen/armv7k-abi.c b/test/CodeGen/armv7k-abi.c
new file mode 100644
index 0000000..9b57de8
--- /dev/null
+++ b/test/CodeGen/armv7k-abi.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple thumbv7k-apple-watchos2.0 -target-abi aapcs16 -target-cpu cortex-a7 %s -o - -emit-llvm | FileCheck %s
+
+#include <arm_neon.h>
+
+// Make sure 64 and 128 bit types are naturally aligned by the v7k ABI:
+
+// CHECK: target datalayout = "e-m:o-p:32:32-i64:64-a:0:32-n32-S128"
+
+typedef struct {
+  float arr[4];
+} HFA;
+
+// CHECK: define void @simple_hfa([4 x float] %h.coerce)
+void simple_hfa(HFA h) {}
+
+// CHECK: define %struct.HFA @return_simple_hfa
+HFA return_simple_hfa() {}
+
+typedef struct {
+  double arr[4];
+} BigHFA;
+
+// We don't want any padding type to be included by Clang when using the
+// APCS-VFP ABI, that needs to be handled by LLVM if needed.
+
+// CHECK: void @no_padding(i32 %r0, i32 %r1, i32 %r2, [4 x double] %d0_d3.coerce, [4 x double] %d4_d7.coerce, [4 x double] %sp.coerce, i64 %split)
+void no_padding(int r0, int r1, int r2, BigHFA d0_d3, BigHFA d4_d7, BigHFA sp,
+                long long split) {}
+
+// Structs larger than 16 bytes should be passed indirectly in space allocated
+// by the caller (a pointer to this storage should be what occurs in the arg
+// list).
+
+typedef struct {
+  float x;
+  long long y;
+  double z;
+} BigStruct;
+
+// CHECK: define void @big_struct_indirect(%struct.BigStruct* %b)
+void big_struct_indirect(BigStruct b) {}
+
+// CHECK: define void @return_big_struct_indirect(%struct.BigStruct* noalias sret
+BigStruct return_big_struct_indirect() {}
+
+// Structs smaller than 16 bytes should be passed directly, and coerced to
+// either [N x i32] or [N x i64] depending on alignment requirements.
+
+typedef struct {
+  float x;
+  int y;
+  double z;
+} SmallStruct;
+
+// CHECK: define void @small_struct_direct([2 x i64] %s.coerce)
+void small_struct_direct(SmallStruct s) {}
+
+// CHECK: define [4 x i32] @return_small_struct_direct()
+SmallStruct return_small_struct_direct() {}
+
+typedef struct {
+  float x;
+  int y;
+  int z;
+} SmallStructSmallAlign;
+
+// CHECK: define void @small_struct_align_direct([3 x i32] %s.coerce)
+void small_struct_align_direct(SmallStructSmallAlign s) {}
+
+typedef struct {
+  char x;
+  short y;
+} PaddedSmallStruct;
+
+// CHECK: define i32 @return_padded_small_struct()
+PaddedSmallStruct return_padded_small_struct() {}
+
+typedef struct {
+  char arr[7];
+} OddlySizedStruct;
+
+// CHECK: define [2 x i32] @return_oddly_sized_struct()
+OddlySizedStruct return_oddly_sized_struct() {}
+
+// CHECK: define <4 x float> @test_va_arg_vec(i8* %l)
+// CHECK:   [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 15
+// CHECK:   [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -16
+// CHECK:   [[ALIGNED_I8:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
+// CHECK:   [[ALIGNED_VEC:%.*]] = bitcast i8* [[ALIGNED_I8]] to <4 x float>
+// CHECK:   load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 16
+float32x4_t test_va_arg_vec(__builtin_va_list l) {
+  return __builtin_va_arg(l, float32x4_t);
+}
diff --git a/test/CodeGen/asm-attrs.c b/test/CodeGen/asm-attrs.c
new file mode 100644
index 0000000..ae72879
--- /dev/null
+++ b/test/CodeGen/asm-attrs.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: call i32 asm "foo0", {{.*}} [[READNONE:#[0-9]+]]
+// CHECK: call i32 asm "foo1", {{.*}} [[READNONE]]
+// CHECK: call i32 asm "foo2", {{.*}} [[NOATTRS:#[0-9]+]]
+// CHECK: call i32 asm sideeffect "foo3", {{.*}} [[NOATTRS]]
+// CHECK: call i32 asm "foo4", {{.*}} [[READONLY:#[0-9]+]]
+// CHECK: call i32 asm "foo5", {{.*}} [[READONLY]]
+// CHECK: call i32 asm "foo6", {{.*}} [[NOATTRS]]
+// CHECK: call void asm sideeffect "foo7", {{.*}} [[NOATTRS]]
+// CHECK: call void asm "foo8", {{.*}} [[NOATTRS]]
+
+// CHECK: attributes [[READNONE]] = { nounwind readnone }
+// CHECK: attributes [[NOATTRS]] = { nounwind }
+// CHECK: attributes [[READONLY]] = { nounwind readonly }
+
+int g0, g1;
+
+struct S {
+  int i;
+} g2;
+
+void test_attrs(int a) {
+  __asm__ ("foo0" : "=r"(g1) : "r"(a));
+  __asm__ ("foo1" : "=r"(g1) : "r"(a) : "cc");
+  __asm__ ("foo2" : "=r"(g1) : "r"(a) : "memory");
+  __asm__ volatile("foo3" : "=r"(g1) : "r"(a));
+  __asm__ ("foo4" : "=r"(g1) : "r"(a), "m"(g0));
+  __asm__ ("foo5" : "=r"(g1) : "r"(a), "Q"(g0));
+  __asm__ ("foo6" : "=r"(g1), "=m"(g0) : "r"(a));
+  __asm__ ("foo7" : : "r"(a));
+  __asm__ ("foo8" : "=r"(g2) : "r"(a));
+}
diff --git a/test/CodeGen/asm-unicode.S b/test/CodeGen/asm-unicode.S
new file mode 100644
index 0000000..f4edbe9
--- /dev/null
+++ b/test/CodeGen/asm-unicode.S
@@ -0,0 +1,12 @@
+// RUN: %clang -S %s -o - | FileCheck %s
+.macro  my_macro, trace=1, uaccess=1
+.if \uaccess
+// CHECK: .if \uaccess
+// CHECK-NOT: .if 곎ss
+// CHECK: my_macro trace=1
+        my_macro trace=1
+.endif
+.endm
+
+foo:
+        my_macro trace=0
diff --git a/test/CodeGen/asm_64.c b/test/CodeGen/asm_64.c
new file mode 100644
index 0000000..72610e1
--- /dev/null
+++ b/test/CodeGen/asm_64.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: @t1
+void t1() {
+  __asm__ ("mov r8w, 100;");
+  // CHECK: call void asm sideeffect "mov r8w, 100;"
+  __asm__ ("mov r8d, 100;");
+  // CHECK: call void asm sideeffect "mov r8d, 100;"
+  __asm__ ("mov r8b, 100;");
+  // CHECK: call void asm sideeffect "mov r8b, 100;"
+  __asm__ ("mov r9w, 100;");
+  // CHECK: call void asm sideeffect "mov r9w, 100;"
+  __asm__ ("mov r9d, 100;");
+  // CHECK: call void asm sideeffect "mov r9d, 100;"
+  __asm__ ("mov r9b, 100;");
+  // CHECK: call void asm sideeffect "mov r9b, 100;"
+  __asm__ ("mov r10w, 100;");
+  // CHECK: call void asm sideeffect "mov r10w, 100;"
+  __asm__ ("mov r10d, 100;");
+  // CHECK: call void asm sideeffect "mov r10d, 100;"
+  __asm__ ("mov r10b, 100;");
+  // CHECK: call void asm sideeffect "mov r10b, 100;"
+  __asm__ ("mov r11w, 100;");
+  // CHECK: call void asm sideeffect "mov r11w, 100;"
+  __asm__ ("mov r11d, 100;");
+  // CHECK: call void asm sideeffect "mov r11d, 100;"
+  __asm__ ("mov r11b, 100;");
+  // CHECK: call void asm sideeffect "mov r11b, 100;"
+  __asm__ ("mov r12w, 100;");
+  // CHECK: call void asm sideeffect "mov r12w, 100;"
+  __asm__ ("mov r12d, 100;");
+  // CHECK: call void asm sideeffect "mov r12d, 100;"
+  __asm__ ("mov r12b, 100;");
+  // CHECK: call void asm sideeffect "mov r12b, 100;"
+  __asm__ ("mov r13w, 100;");
+  // CHECK: call void asm sideeffect "mov r13w, 100;"
+  __asm__ ("mov r13d, 100;");
+  // CHECK: call void asm sideeffect "mov r13d, 100;"
+  __asm__ ("mov r13b, 100;");
+  // CHECK: call void asm sideeffect "mov r13b, 100;"
+  __asm__ ("mov r14w, 100;");
+  // CHECK: call void asm sideeffect "mov r14w, 100;"
+  __asm__ ("mov r14d, 100;");
+  // CHECK: call void asm sideeffect "mov r14d, 100;"
+  __asm__ ("mov r14b, 100;");
+  // CHECK: call void asm sideeffect "mov r14b, 100;"
+  __asm__ ("mov r15w, 100;");
+  // CHECK: call void asm sideeffect "mov r15w, 100;"
+  __asm__ ("mov r15d, 100;");
+  // CHECK: call void asm sideeffect "mov r15d, 100;"
+  __asm__ ("mov r15b, 100;");
+  // CHECK: call void asm sideeffect "mov r15b, 100;"
+}
diff --git a/test/CodeGen/atomic-arm64.c b/test/CodeGen/atomic-arm64.c
index 98f27ab..5cae3d1 100644
--- a/test/CodeGen/atomic-arm64.c
+++ b/test/CodeGen/atomic-arm64.c
@@ -21,7 +21,7 @@
 extern _Atomic(pointer_pair_t) a_pointer_pair;
 extern _Atomic(pointer_quad_t) a_pointer_quad;
 
-// CHECK:    define void @test0()
+// CHECK-LABEL:define void @test0()
 // CHECK:      [[TEMP:%.*]] = alloca i8, align 1
 // CHECK-NEXT: store i8 1, i8* [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[TEMP]], align 1
@@ -30,7 +30,7 @@
   __c11_atomic_store(&a_bool, 1, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test1()
+// CHECK-LABEL:define void @test1()
 // CHECK:      [[TEMP:%.*]] = alloca float, align 4
 // CHECK-NEXT: store float 3.000000e+00, float* [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast float* [[TEMP]] to i32*
@@ -40,7 +40,7 @@
   __c11_atomic_store(&a_float, 3, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test2()
+// CHECK-LABEL:define void @test2()
 // CHECK:      [[TEMP:%.*]] = alloca i8*, align 8
 // CHECK-NEXT: store i8* @a_bool, i8** [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[TEMP]] to i64*
@@ -50,24 +50,25 @@
   __c11_atomic_store(&a_pointer, &a_bool, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test3(
+// CHECK-LABEL:define void @test3(
 // CHECK:      [[PAIR:%.*]] = alloca [[PAIR_T:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[PAIR_T]], align 8
 // CHECK:      llvm.memcpy
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[PAIR_T]]* [[TEMP]] to i128*
-// CHECK-NEXT: [[T1:%.*]] = load i128, i128* [[T0]], align 16
+// CHECK-NEXT: [[T1:%.*]] = load i128, i128* [[T0]], align 8
 // CHECK-NEXT: store atomic i128 [[T1]], i128* bitcast ([[PAIR_T]]* @a_pointer_pair to i128*) seq_cst, align 16
 void test3(pointer_pair_t pair) {
   __c11_atomic_store(&a_pointer_pair, pair, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test4([[QUAD_T:%.*]]*
+// CHECK-LABEL:define void @test4(
 // CHECK:      [[TEMP:%.*]] = alloca [[QUAD_T:%.*]], align 8
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T0]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256*
+// CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8*
+// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5)
 void test4(pointer_quad_t quad) {
   __c11_atomic_store(&a_pointer_quad, quad, memory_order_seq_cst);
 }
diff --git a/test/CodeGen/atomic-ops-libcall.c b/test/CodeGen/atomic-ops-libcall.c
index e55a1bd..0093a8c 100644
--- a/test/CodeGen/atomic-ops-libcall.c
+++ b/test/CodeGen/atomic-ops-libcall.c
@@ -35,3 +35,82 @@
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
+
+int test_atomic_fetch_add(int *p) {
+  // CHECK: test_atomic_fetch_add
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_add(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_sub(int *p) {
+  // CHECK: test_atomic_fetch_sub
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_and(int *p) {
+  // CHECK: test_atomic_fetch_and
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_and(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_or(int *p) {
+  // CHECK: test_atomic_fetch_or
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_or(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_xor(int *p) {
+  // CHECK: test_atomic_fetch_xor
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_nand(int *p) {
+  // CHECK: test_atomic_fetch_nand
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_add_fetch(int *p) {
+  // CHECK: test_atomic_add_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55
+  return __atomic_add_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_sub_fetch(int *p) {
+  // CHECK: test_atomic_sub_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55
+  return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_and_fetch(int *p) {
+  // CHECK: test_atomic_and_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55
+  return __atomic_and_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_or_fetch(int *p) {
+  // CHECK: test_atomic_or_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55
+  return __atomic_or_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_xor_fetch(int *p) {
+  // CHECK: test_atomic_xor_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55
+  return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_nand_fetch(int *p) {
+  // CHECK: test_atomic_nand_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: [[OR:%[^ ]*]] = or i32 [[CALL]], -56
+  // CHECK: {{%[^ ]*}} = xor i32 [[OR]], 55
+  return __atomic_nand_fetch(p, 55, memory_order_seq_cst);
+}
diff --git a/test/CodeGen/atomic-ops.c b/test/CodeGen/atomic-ops.c
index 13ab5f1..353f77c 100644
--- a/test/CodeGen/atomic-ops.c
+++ b/test/CodeGen/atomic-ops.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -triple=i686-apple-darwin9 | FileCheck %s
+// REQUIRES: x86-registered-target
 
 // Also test serialization of atomic operations here, to avoid duplicating the
 // test.
@@ -178,8 +179,8 @@
   // CHECK-LABEL: @fd1
   // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
   // CHECK: [[RET:%.*]]    = alloca %struct.S, align 4
-  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: [[CAST:%.*]]   = bitcast %struct.S* [[RET]] to i64*
+  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4
   struct S ret;
   __atomic_load(a, &ret, memory_order_seq_cst);
@@ -194,8 +195,9 @@
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
   // CHECK-NEXT: ret void
@@ -213,11 +215,12 @@
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
-  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
   // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
 
   __atomic_exchange(a, b, c, memory_order_seq_cst);
@@ -234,9 +237,11 @@
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
-  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_B_TMP:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
+  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast i64* [[COERCED_B_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
   // CHECK-NEXT: ret i1 [[CALL]]
@@ -311,6 +316,8 @@
   char c[17];
 } seventeen;
 
+struct Incomplete;
+
 int lock_free(struct Incomplete *incomplete) {
   // CHECK-LABEL: @lock_free
 
diff --git a/test/CodeGen/atomic_ops.c b/test/CodeGen/atomic_ops.c
index 980ecd2..0af1d38 100644
--- a/test/CodeGen/atomic_ops.c
+++ b/test/CodeGen/atomic_ops.c
@@ -1,4 +1,5 @@
-// XFAIL: hexagon
+// XFAIL: hexagon,sparc
+//        (due to not having native load atomic support)
 // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple mips-linux-gnu -emit-llvm %s -o - | FileCheck %s
 
diff --git a/test/CodeGen/attr-disable-tail-calls.c b/test/CodeGen/attr-disable-tail-calls.c
index 8141349..d47b14f 100644
--- a/test/CodeGen/attr-disable-tail-calls.c
+++ b/test/CodeGen/attr-disable-tail-calls.c
@@ -1,11 +1,19 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -mdisable-tail-calls -o - | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLE
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLE
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -mdisable-tail-calls -o - | FileCheck %s -check-prefix=DISABLE
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s -check-prefix=ENABLE
 
-// CHECK: define i32 @f1() [[ATTR:#[0-9]+]] {
+// DISABLE: define i32 @f1() [[ATTRTRUE:#[0-9]+]] {
+// DISABLE: define i32 @f2() [[ATTRTRUE]] {
+// ENABLE: define i32 @f1() [[ATTRFALSE:#[0-9]+]] {
+// ENABLE: define i32 @f2() [[ATTRTRUE:#[0-9]+]] {
 
 int f1() {
   return 0;
 }
 
-// DISABLE: attributes [[ATTR]] = { {{.*}} "disable-tail-calls"="true" {{.*}} }
-// ENABLE: attributes [[ATTR]] = { {{.*}} "disable-tail-calls"="false" {{.*}} }
+int f2() __attribute__((disable_tail_calls)) {
+  return 0;
+}
+
+// DISABLE: attributes [[ATTRTRUE]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
+// ENABLE: attributes [[ATTRFALSE]] = { {{.*}}"disable-tail-calls"="false"{{.*}} }
+// ENABLE: attributes [[ATTRTRUE]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
diff --git a/test/CodeGen/attr-func-def.c b/test/CodeGen/attr-func-def.c
new file mode 100644
index 0000000..ceafa12
--- /dev/null
+++ b/test/CodeGen/attr-func-def.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10.0 -emit-llvm -Oz -o - %s | FileCheck %s
+
+// CHECK: define i32 @foo2(i32 %a) [[ATTRS2:#[0-9]+]] {
+// CHECK: define i32 @foo1(i32 %a) [[ATTRS1:#[0-9]+]] {
+
+int foo1(int);
+
+int foo2(int a) {
+  return foo1(a + 2);
+}
+
+__attribute__((optnone))
+int foo1(int a) {
+    return a + 1;
+}
+
+// CHECK: attributes [[ATTRS2]] = { {{.*}}optsize{{.*}} }
+// CHECK: attributes [[ATTRS1]] = { {{.*}}optnone{{.*}} }
diff --git a/test/CodeGen/attr-minsize.cpp b/test/CodeGen/attr-minsize.cpp
index 0f07725..1e5c634 100644
--- a/test/CodeGen/attr-minsize.cpp
+++ b/test/CodeGen/attr-minsize.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -Oz -emit-llvm %s -o - | FileCheck %s -check-prefix=Oz
-// RUN: %clang_cc1     -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O1 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O3 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -Os -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -Oz -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=Oz
+// RUN: %clang_cc1     -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O1 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O2 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O3 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -Os -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
 // Check that we set the minsize attribute on each function
 // when Oz optimization level is set.
 
diff --git a/test/CodeGen/attr-mode-vector-types.c b/test/CodeGen/attr-mode-vector-types.c
new file mode 100644
index 0000000..3c028fb
--- /dev/null
+++ b/test/CodeGen/attr-mode-vector-types.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+
+typedef int __attribute__((mode(byte))) __attribute__((vector_size(4))) vec_t1;
+typedef int __attribute__((mode(QI))) __attribute__((vector_size(8))) vec_t2;
+typedef int __attribute__((mode(SI))) __attribute__((vector_size(16))) vec_t3;
+typedef int __attribute__((mode(DI))) __attribute__((vector_size(64)))vec_t4;
+typedef float __attribute__((mode(SF))) __attribute__((vector_size(128))) vec_t5;
+typedef float __attribute__((mode(DF))) __attribute__((vector_size(256))) vec_t6;
+
+void check() {
+  // CHECK: alloca <4 x i8>
+  vec_t1 v1;
+  // CHECK: alloca <8 x i8>
+  vec_t2 v2;
+  // CHECK: alloca <4 x i32>
+  vec_t3 v3;
+  // CHECK: alloca <8 x i64>
+  vec_t4 v4;
+  // CHECK: alloca <32 x float>
+  vec_t5 v5;
+  // CHECK: alloca <32 x double>
+  vec_t6 v6;
+}
+
+// CHECK: ret i32 4
+int check_size1() { return sizeof(vec_t1); }
+
+// CHECK: ret i32 8
+int check_size2() { return sizeof(vec_t2); }
+
+// CHECK: ret i32 16
+int check_size3() { return sizeof(vec_t3); }
+
+// CHECK: ret i32 64
+int check_size4() { return sizeof(vec_t4); }
+
+// CHECK: ret i32 128
+int check_size5() { return sizeof(vec_t5); }
+
+// CHECK: ret i32 256
+int check_size6() { return sizeof(vec_t6); }
diff --git a/test/CodeGen/attr-no-tail.c b/test/CodeGen/attr-no-tail.c
new file mode 100644
index 0000000..1c9aca6
--- /dev/null
+++ b/test/CodeGen/attr-no-tail.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s
+
+// CHECK: %{{[a-z0-9]+}} = notail call i32 @callee0(i32 %
+// CHECK: %{{[a-z0-9]+}} = notail call i32 @callee1(i32 %
+
+// Check that indirect calls do not have the notail marker.
+// CHECK: store i32 (i32)* @callee1, i32 (i32)** [[ALLOCA1:%[A-Za-z0-9]+]], align 8
+// CHECK: [[INDIRFUNC:%[0-9]+]] = load i32 (i32)*, i32 (i32)** [[ALLOCA1]], align 8
+// CHECK: %{{[a-z0-9]+}} = call i32 [[INDIRFUNC]](i32 %{{[0-9]+}}
+
+// CHECK: %{{[a-z0-9]+}} = call i32 @callee2(i32 %
+
+int callee0(int a) __attribute__((not_tail_called)) {
+  return a + 1;
+}
+
+int callee1(int) __attribute__((not_tail_called));
+
+int callee2(int);
+
+typedef int (*FuncTy)(int);
+
+int foo0(int a) {
+  if (a > 1)
+    return callee0(a);
+  if (a == 1)
+    return callee1(a);
+  if (a < 0) {
+    FuncTy F = callee1;
+    return (*F)(a);
+  }
+  return callee2(a);
+}
diff --git a/test/CodeGen/attr-nodebug.c b/test/CodeGen/attr-nodebug.c
index 66caa2b..8ffe336 100644
--- a/test/CodeGen/attr-nodebug.c
+++ b/test/CodeGen/attr-nodebug.c
@@ -1,12 +1,32 @@
-// RUN: %clang_cc1 -g -emit-llvm -o %t %s
-// RUN: not grep 'call void @llvm.dbg.func.start' %t
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 
 void t1() __attribute__((nodebug));
 
 void t1()
 {
   int a = 10;
-  
   a++;
 }
 
+void t2()
+{
+  int b = 10;
+  b++;
+}
+
+// With nodebug, IR should have no llvm.dbg.* calls, or !dbg annotations.
+// CHECK-LABEL: @t1
+// CHECK-NOT:   dbg
+// CHECK:       }
+
+// For sanity, check those things do occur normally.
+// CHECK-LABEL: @t2
+// CHECK:       call{{.*}}llvm.dbg
+// CHECK:       !dbg
+// CHECK:       }
+
+// We should see a function description for t2 but not t1.
+// CHECK-NOT: DISubprogram(name: "t1"
+// CHECK:     DISubprogram(name: "t2"
+// CHECK-NOT: DISubprogram(name: "t1"
+
diff --git a/test/CodeGen/attr-noinline.c b/test/CodeGen/attr-noinline.c
index dbca71f..44eb1e8 100644
--- a/test/CodeGen/attr-noinline.c
+++ b/test/CodeGen/attr-noinline.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o %t %s
 // RUN: grep 'noinline' %t
 
 void t1() __attribute__((noinline));
diff --git a/test/CodeGen/attr-target-ppc.c b/test/CodeGen/attr-target-ppc.c
new file mode 100644
index 0000000..d290174
--- /dev/null
+++ b/test/CodeGen/attr-target-ppc.c
@@ -0,0 +1,4 @@
+// RUN: not %clang_cc1 -triple powerpc64le-linux-gnu -emit-llvm %s -o -
+
+long __attribute__((target("power8-vector,no-vsx"))) foo (void) { return 0; }  // expected-error {{option '-mpower8-vector' cannot be specified with '-mno-vsx'}}
+
diff --git a/test/CodeGen/attr-target-x86-mmx.c b/test/CodeGen/attr-target-x86-mmx.c
new file mode 100644
index 0000000..6720c6b
--- /dev/null
+++ b/test/CodeGen/attr-target-x86-mmx.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple i386-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// Picking a cpu that doesn't have mmx or sse by default so we can enable it later.
+
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// Verify that when we turn on sse that we also turn on mmx.
+void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
+  _mm_slli_pi16(a, c);
+  _mm_slli_pi32(a, c);
+  _mm_slli_si64(a, c);
+
+  _mm_srli_pi16(a, c);
+  _mm_srli_pi32(a, c);
+  _mm_srli_si64(a, c);
+
+  _mm_srai_pi16(a, c);
+  _mm_srai_pi32(a, c);
+}
+
+// CHECK: "target-features"="+mmx,+sse"
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
new file mode 100644
index 0000000..58e33d1
--- /dev/null
+++ b/test/CodeGen/attr-target-x86.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu x86-64 -emit-llvm %s -o - | FileCheck %s
+
+int baz(int a) { return 4; }
+
+int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
+
+int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
+int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
+
+int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
+
+int __attribute__((target("sse4"))) panda(int a) { return 4; }
+
+int bar(int a) { return baz(a) + foo(a); }
+
+int __attribute__((target("avx,      sse4.2,      arch=   ivybridge"))) qux(int a) { return 4; }
+int __attribute__((target("no-aes, arch=ivybridge"))) qax(int a) { return 4; }
+
+int __attribute__((target("no-mmx"))) qq(int a) { return 40; }
+
+// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
+// CHECK: baz{{.*}} #0
+// CHECK: foo{{.*}} #1
+// We ignore the tune attribute so walrus should be identical to baz and bar.
+// CHECK: walrus{{.*}} #0
+// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
+// CHECK: koala{{.*}} #0
+// CHECK: echidna{{.*}} #2
+// CHECK: panda{{.*}} #3
+// CHECK: bar{{.*}} #0
+// CHECK: qux{{.*}} #1
+// CHECK: qax{{.*}} #4
+// CHECK: qq{{.*}} #5
+// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
+// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
+// CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt,-aes"
+// CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,-3dnow,-3dnowa,-mmx"
diff --git a/test/CodeGen/attr-target.c b/test/CodeGen/attr-target.c
deleted file mode 100644
index 8c0f335..0000000
--- a/test/CodeGen/attr-target.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu x86-64 -emit-llvm %s -o - | FileCheck %s
-
-int baz(int a) { return 4; }
-
-int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
-
-int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
-int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
-
-int __attribute__((target("mno-sse2"))) echidna(int a) { return 4; }
-
-int bar(int a) { return baz(a) + foo(a); }
-
-// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
-// CHECK: baz{{.*}} #0
-// CHECK: foo{{.*}} #1
-// We ignore the tune attribute so walrus should be identical to baz and bar.
-// CHECK: walrus{{.*}} #0
-// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
-// CHECK: koala{{.*}} #0
-// CHECK: echidna{{.*}} #2
-// CHECK: bar{{.*}} #0
-// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+sse,+sse2,+avx,+sse4.2"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2,-sse2"
diff --git a/test/CodeGen/attributes.c b/test/CodeGen/attributes.c
index 4da3eca..7bfc392 100644
--- a/test/CodeGen/attributes.c
+++ b/test/CodeGen/attributes.c
@@ -26,7 +26,7 @@
 // CHECK: @t12 = global i32 0, section "SECT"
 int t12 __attribute__((section("SECT")));
 
-// CHECK: @t9 = weak alias bitcast (void ()* @__t8 to void (...)*)
+// CHECK: @t9 = weak alias void (...), bitcast (void ()* @__t8 to void (...)*)
 void __t8() {}
 void t9() __attribute__((weak, alias("__t8")));
 
diff --git a/test/CodeGen/available-externally-hidden.cpp b/test/CodeGen/available-externally-hidden.cpp
new file mode 100644
index 0000000..88ebfa9
--- /dev/null
+++ b/test/CodeGen/available-externally-hidden.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -O2 -fvisibility hidden -std=c++11 -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s
+
+// Ensure that available_externally functions eliminated at -O2 are now
+// declarations, and are not emitted as hidden with -fvisibility=hidden,
+// but rather with default visibility.
+struct Filter {
+  virtual void Foo();
+  int a;
+};
+
+class Message{};
+class Sender {
+ public:
+  virtual bool Send(Message* msg) = 0;
+
+ protected:
+  virtual ~Sender() {}
+};
+
+// CHECK: declare zeroext i1 @_ZThn16_N17SyncMessageFilter4SendEP7Message
+class SyncMessageFilter : public Filter, public Sender {
+ public:
+  bool Send(Message* message) override;
+};
+
+class TestSyncMessageFilter : public SyncMessageFilter {
+};
+
+int main() {
+  TestSyncMessageFilter *f = new TestSyncMessageFilter;
+  f->Send(new Message);
+}
diff --git a/test/CodeGen/available-externally-suppress.c b/test/CodeGen/available-externally-suppress.c
index 390d201..a25a282 100644
--- a/test/CodeGen/available-externally-suppress.c
+++ b/test/CodeGen/available-externally-suppress.c
@@ -1,12 +1,18 @@
 // RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s
+// RUN: %clang_cc1 -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s
+// RUN: %clang_cc1 -flto -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -check-prefix=LTO
 
 // Ensure that we don't emit available_externally functions at -O0.
+// Also should not emit them at -O2, unless -flto is present in which case
+// we should preserve them for link-time inlining decisions.
 int x;
 
 inline void f0(int y) { x = y; }
 
 // CHECK-LABEL: define void @test()
 // CHECK: declare void @f0(i32)
+// LTO-LABEL: define void @test()
+// LTO: define available_externally void @f0
 void test() {
   f0(17);
 }
@@ -19,9 +25,13 @@
 }
 
 // CHECK: @test1
+// LTO: @test1
 int test1(int x) {
   // CHECK: br i1
   // CHECK-NOT: call {{.*}} @f1
   // CHECK: ret i32
+  // LTO: br i1
+  // LTO-NOT: call {{.*}} @f1
+  // LTO: ret i32
   return f1(x);
 }
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index 99d0633..ee0f58f 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -96,19 +96,22 @@
 
 int test_extract_epi32(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi32
-  // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+  // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7
+  // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]
   return _mm256_extract_epi32(__a, 8);
 }
 
 int test_extract_epi16(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi16
-  // CHECK: extractelement <16 x i16> %{{.*}}, i32 0
+  // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15
+  // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]
   return _mm256_extract_epi16(__a, 16);
 }
 
 int test_extract_epi8(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi8
-  // CHECK: extractelement <32 x i8> %{{.*}}, i32 0
+  // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31
+  // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]
   return _mm256_extract_epi8(__a, 32);
 }
 
@@ -147,3 +150,21 @@
   // CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}}
   return _mm256_insert_epi64(__a, 42, 3);
 }
+
+__m256 test_mm256_undefined_ps() {
+  // CHECK-LABEL: @test_mm256_undefined_ps
+  // CHECK: ret <8 x float> undef
+  return _mm256_undefined_ps();
+}
+
+__m256d test_mm256_undefined_pd() {
+  // CHECK-LABEL: @test_mm256_undefined_pd
+  // CHECK: ret <4 x double> undef
+  return _mm256_undefined_pd();
+}
+
+__m256i test_mm256_undefined_si256() {
+  // CHECK-LABEL: @test_mm256_undefined_si256
+  // CHECK: ret <4 x i64> undef
+  return _mm256_undefined_si256();
+}
diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c
index 5b205d7..30d1bd5 100644
--- a/test/CodeGen/avx-cmp-builtins.c
+++ b/test/CodeGen/avx-cmp-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here.
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c
index 913f9d2..22bee33 100644
--- a/test/CodeGen/avx-shuffle-builtins.c
+++ b/test/CodeGen/avx-shuffle-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// FIXME: This is testing optimized generation of shuffle instructions and should be fixed.
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index e362871..89981bb 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -601,28 +602,35 @@
   return _mm256_unpacklo_epi64(a, b);
 }
 
-__m256i test_mm256_stream_load_si256(__m256i *a) {
+__m256i test_mm256_stream_load_si256(__m256i const *a) {
   // CHECK: @llvm.x86.avx2.movntdqa
   return _mm256_stream_load_si256(a);
 }
 
 __m128 test_mm_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK-LABEL: test_mm_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastss_ps(a);
 }
 
 __m128d test_mm_broadcastsd_pd(__m128d a) {
+  // CHECK-LABEL: test_mm_broadcastsd_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_broadcastsd_pd(a);
 }
 
 __m256 test_mm256_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK-LABEL: test_mm256_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastss_ps(a);
 }
 
 __m256d test_mm256_broadcastsd_pd(__m128d a) {
-  // check: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK-LABEL: test_mm256_broadcastsd_pd
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastsd_pd(a);
 }
 
@@ -646,42 +654,58 @@
 }
 
 __m256i test_mm256_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK-LABEL: test_mm256_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer
   return _mm256_broadcastb_epi8(a);
 }
 
 __m256i test_mm256_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK-LABEL: test_mm256_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer
   return _mm256_broadcastw_epi16(a);
 }
 
 __m256i test_mm256_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK-LABEL: test_mm256_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastd_epi32(a);
 }
 
 __m256i test_mm256_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK-LABEL: test_mm256_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastq_epi64(a);
 }
 
 __m128i test_mm_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK-LABEL: test_mm_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer
   return _mm_broadcastb_epi8(a);
 }
 
 __m128i test_mm_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK-LABEL: test_mm_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer
   return _mm_broadcastw_epi16(a);
 }
 
 __m128i test_mm_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK-LABEL: test_mm_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastd_epi32(a);
 }
 
 __m128i test_mm_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK-LABEL: test_mm_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_broadcastq_epi64(a);
 }
 
@@ -695,7 +719,7 @@
   return _mm256_permute4x64_pd(a, 25);
 }
 
-__m256 test_mm256_permutevar8x32_ps(__m256 a, __m256 b) {
+__m256 test_mm256_permutevar8x32_ps(__m256 a, __m256i b) {
   // CHECK: @llvm.x86.avx2.permps
   return _mm256_permutevar8x32_ps(a, b);
 }
@@ -849,11 +873,13 @@
   // CHECK: @llvm.x86.avx2.gather.d.pd.256
   return _mm256_mask_i32gather_pd(a, b, c, d, 2);
 }
+
 __m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c,
                                   __m128d d) {
   // CHECK: @llvm.x86.avx2.gather.q.pd
   return _mm_mask_i64gather_pd(a, b, c, d, 2);
 }
+
 __m256d test_mm256_mask_i64gather_pd(__m256d a, double const *b, __m256i c,
                                       __m256d d) {
   // CHECK: @llvm.x86.avx2.gather.q.pd.256
@@ -865,16 +891,19 @@
   // CHECK: @llvm.x86.avx2.gather.d.ps
   return _mm_mask_i32gather_ps(a, b, c, d, 2);
 }
+
 __m256 test_mm256_mask_i32gather_ps(__m256 a, float const *b, __m256i c,
                                      __m256 d) {
   // CHECK: @llvm.x86.avx2.gather.d.ps.256
   return _mm256_mask_i32gather_ps(a, b, c, d, 2);
 }
+
 __m128 test_mm_mask_i64gather_ps(__m128 a, float const *b, __m128i c,
                                  __m128 d) {
   // CHECK: @llvm.x86.avx2.gather.q.ps
   return _mm_mask_i64gather_ps(a, b, c, d, 2);
 }
+
 __m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c,
                                     __m128 d) {
   // CHECK: @llvm.x86.avx2.gather.q.ps.256
@@ -886,16 +915,19 @@
   // CHECK: @llvm.x86.avx2.gather.d.d
   return _mm_mask_i32gather_epi32(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i32gather_epi32(__m256i a, int const *b, __m256i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.d.d.256
   return _mm256_mask_i32gather_epi32(a, b, c, d, 2);
 }
+
 __m128i test_mm_mask_i64gather_epi32(__m128i a, int const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.d
   return _mm_mask_i64gather_epi32(a, b, c, d, 2);
 }
+
 __m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c,
                                         __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.d.256
@@ -907,16 +939,19 @@
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_mask_i32gather_epi64(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i32gather_epi64(__m256i a, long long const *b, __m128i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_mask_i32gather_epi64(a, b, c, d, 2);
 }
+
 __m128i test_mm_mask_i64gather_epi64(__m128i a, long long const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_mask_i64gather_epi64(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i64gather_epi64(__m256i a, long long const *b, __m256i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
@@ -927,30 +962,37 @@
   // CHECK: @llvm.x86.avx2.gather.d.pd
   return _mm_i32gather_pd(b, c, 2);
 }
+
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.pd.256
   return _mm256_i32gather_pd(b, c, 2);
 }
+
 __m128d test_mm_i64gather_pd(double const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.pd
   return _mm_i64gather_pd(b, c, 2);
 }
+
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.pd.256
   return _mm256_i64gather_pd(b, c, 2);
 }
+
 __m128 test_mm_i32gather_ps(float const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.ps
   return _mm_i32gather_ps(b, c, 2);
 }
+
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.d.ps.256
   return _mm256_i32gather_ps(b, c, 2);
 }
+
 __m128 test_mm_i64gather_ps(float const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.ps
   return _mm_i64gather_ps(b, c, 2);
 }
+
 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.ps.256
   return _mm256_i64gather_ps(b, c, 2);
@@ -960,30 +1002,37 @@
   // CHECK: @llvm.x86.avx2.gather.d.d
   return _mm_i32gather_epi32(b, c, 2);
 }
+
 __m256i test_mm256_i32gather_epi32(int const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.d.d.256
   return _mm256_i32gather_epi32(b, c, 2);
 }
+
 __m128i test_mm_i64gather_epi32(int const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.d
   return _mm_i64gather_epi32(b, c, 2);
 }
+
 __m128i test_mm256_i64gather_epi32(int const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.d.256
   return _mm256_i64gather_epi32(b, c, 2);
 }
+
 __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_i32gather_epi64(b, c, 2);
 }
+
 __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_i32gather_epi64(b, c, 2);
 }
+
 __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_i64gather_epi64(b, c, 2);
 }
+
 __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
   return _mm256_i64gather_epi64(b, c, 2);
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index 452d737..7addd98 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -427,3 +431,613 @@
   //CHECK: @llvm.x86.avx512.mask.pmull.w.512
   return _mm512_maskz_mullo_epi16(__U, __A, __B);
 }
+
+__m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
+  // CHECK-LABEL: @test_mm512_mask_blend_epi8
+  // CHECK: @llvm.x86.avx512.mask.blend.b.512
+  return _mm512_mask_blend_epi8(__U,__A,__W); 
+}
+__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
+  // CHECK-LABEL: @test_mm512_mask_blend_epi16
+  // CHECK: @llvm.x86.avx512.mask.blend.w.512
+  return _mm512_mask_blend_epi16(__U,__A,__W); 
+}
+__m512i test_mm512_abs_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_abs_epi8(__A); 
+}
+__m512i test_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_mask_abs_epi8(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_maskz_abs_epi8(__U,__A); 
+}
+__m512i test_mm512_abs_epi16(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_abs_epi16(__A); 
+}
+__m512i test_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_mask_abs_epi16(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_maskz_abs_epi16(__U,__A); 
+}
+__m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_packs_epi32(__A,__B); 
+}
+__m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_maskz_packs_epi32(__M,__A,__B); 
+}
+__m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_mask_packs_epi32(__W,__M,__A,__B); 
+}
+__m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_packs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_mask_packs_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_maskz_packs_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_packus_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_packus_epi32(__A,__B); 
+}
+__m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_maskz_packus_epi32(__M,__A,__B); 
+}
+__m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_mask_packus_epi32(__W,__M,__A,__B); 
+}
+__m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_packus_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_mask_packus_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_maskz_packus_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_adds_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_mask_adds_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_maskz_adds_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_adds_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_mask_adds_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_maskz_adds_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_adds_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_mask_adds_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_maskz_adds_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_adds_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_mask_adds_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_maskz_adds_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_avg_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_mask_avg_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_maskz_avg_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_avg_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_mask_avg_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_maskz_avg_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_max_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_max_epi8(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_maskz_max_epi8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_mask_max_epi8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_max_epi16(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_maskz_max_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_mask_max_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_max_epu8(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_maskz_max_epu8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_mask_max_epu8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_max_epu16(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_maskz_max_epu16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_mask_max_epu16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_min_epi8(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_maskz_min_epi8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_mask_min_epi8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_min_epi16(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_maskz_min_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_mask_min_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_min_epu8(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_maskz_min_epu8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_mask_min_epu8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_min_epu16(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_maskz_min_epu16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_mask_min_epu16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_shuffle_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_mask_shuffle_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_maskz_shuffle_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_subs_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_mask_subs_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_maskz_subs_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_subs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_mask_subs_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_maskz_subs_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_subs_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_mask_subs_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_maskz_subs_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_subs_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_mask_subs_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_maskz_subs_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.hi.512
+  return _mm512_mask2_permutex2var_epi16(__A,__I,__U,__B); 
+}
+__m512i test_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  return _mm512_permutex2var_epi16(__A,__I,__B); 
+}
+__m512i test_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  return _mm512_mask_permutex2var_epi16(__A,__U,__I,__B); 
+}
+__m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.512
+  return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B); 
+}
+
+__m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_mulhrs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A,        __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_maskz_mulhrs_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_mulhi_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_mask_mulhi_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_maskz_mulhi_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_mulhi_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_mask_mulhi_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_maskz_mulhi_epu16(__U,__A,__B); 
+}
+
+__m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_maddubs_epi16(__X,__Y); 
+}
+__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,         __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_mask_maddubs_epi16(__W,__U,__X,__Y); 
+}
+__m512i test_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_maskz_maddubs_epi16(__U,__X,__Y); 
+}
+__m512i test_mm512_madd_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_madd_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A,      __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_mask_madd_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_maskz_madd_epi16(__U,__A,__B); 
+}
+
+__m256i test_mm512_cvtsepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_cvtsepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtsepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtsepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m256i test_mm512_cvtusepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_cvtusepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtusepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtusepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m256i test_mm512_cvtepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_cvtepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m512i test_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_unpackhi_epi8(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_unpackhi_epi16(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_unpacklo_epi8(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_unpacklo_epi16(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
diff --git a/test/CodeGen/avx512cdintrin.c b/test/CodeGen/avx512cdintrin.c
new file mode 100644
index 0000000..1c8b19e
--- /dev/null
+++ b/test/CodeGen/avx512cdintrin.c
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Werror | FileCheck %s

+

+// Don't include mm_malloc.h, it's system specific.

+#define __MM_MALLOC_H

+

+#include <immintrin.h>

+

+__m512i test_mm512_conflict_epi64(__m512i __A) {

+  // CHECK-LABEL: @test_mm512_conflict_epi64

+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512

+  return _mm512_conflict_epi64(__A); 

+}

+__m512i test_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_mask_conflict_epi64

+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512

+  return _mm512_mask_conflict_epi64(__W,__U,__A); 

+}

+__m512i test_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_maskz_conflict_epi64

+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512

+  return _mm512_maskz_conflict_epi64(__U,__A); 

+}

+__m512i test_mm512_conflict_epi32(__m512i __A) {

+  // CHECK-LABEL: @test_mm512_conflict_epi32

+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512

+  return _mm512_conflict_epi32(__A); 

+}

+__m512i test_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_mask_conflict_epi32

+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512

+  return _mm512_mask_conflict_epi32(__W,__U,__A); 

+}

+__m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_maskz_conflict_epi32

+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512

+  return _mm512_maskz_conflict_epi32(__U,__A); 

+}

+__m512i test_mm512_lzcnt_epi32(__m512i __A) {

+  // CHECK-LABEL: @test_mm512_lzcnt_epi32

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512

+  return _mm512_lzcnt_epi32(__A); 

+}

+__m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_mask_lzcnt_epi32

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512

+  return _mm512_mask_lzcnt_epi32(__W,__U,__A); 

+}

+__m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_maskz_lzcnt_epi32

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512

+  return _mm512_maskz_lzcnt_epi32(__U,__A); 

+}

+__m512i test_mm512_lzcnt_epi64(__m512i __A) {

+  // CHECK-LABEL: @test_mm512_lzcnt_epi64

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512

+  return _mm512_lzcnt_epi64(__A); 

+}

+__m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_mask_lzcnt_epi64

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512

+  return _mm512_mask_lzcnt_epi64(__W,__U,__A); 

+}

+__m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {

+  // CHECK-LABEL: @test_mm512_maskz_lzcnt_epi64

+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512

+  return _mm512_maskz_lzcnt_epi64(__U,__A); 

+}

diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c
index e35b243..fc09a28 100644
--- a/test/CodeGen/avx512dq-builtins.c
+++ b/test/CodeGen/avx512dq-builtins.c
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
+
 __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mullo_epi64
   // CHECK: mul <8 x i64>
@@ -162,3 +166,580 @@
   // CHECK: @llvm.x86.avx512.mask.andn.ps.512
   return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B);
 }
+
+__m512i test_mm512_cvtpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_cvtpd_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_cvt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_maskz_cvt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_cvtpd_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_cvt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_maskz_cvt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_cvtps_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_cvt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_mask_cvt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_maskz_cvt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_cvtps_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_cvt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_mask_cvt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_maskz_cvt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_cvtepi64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_cvtepi64_pd(__A); 
+}
+
+__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_cvtepi64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_cvtepi64_ps(__A); 
+}
+
+__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_cvttpd_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_cvtt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_maskz_cvtt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_cvttpd_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_cvtt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_maskz_cvtt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_cvttps_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_cvtt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_maskz_cvtt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_cvttps_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_cvtt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_maskz_cvtt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_cvtepu64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_cvtepu64_pd(__A); 
+}
+
+__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_cvtepu64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_cvtepu64_ps(__A); 
+}
+
+__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_range_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_range_pd(__A, __B, 4); 
+}
+
+__m512d test_mm512_mask_range_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m512d test_mm512_maskz_range_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m512d test_mm512_range_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_range_round_pd(__A, __B, 4, 8); 
+}
+
+__m512d test_mm512_mask_range_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_mask_range_round_pd(__W, __U, __A, __B, 4, 8); 
+}
+
+__m512d test_mm512_maskz_range_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_maskz_range_round_pd(__U, __A, __B, 4, 8); 
+}
+
+__m512 test_mm512_range_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_range_ps(__A, __B, 4); 
+}
+
+__m512 test_mm512_mask_range_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m512 test_mm512_maskz_range_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m512 test_mm512_range_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_range_round_ps(__A, __B, 4, 8); 
+}
+
+__m512 test_mm512_mask_range_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_mask_range_round_ps(__W, __U, __A, __B, 4, 8); 
+}
+
+__m512 test_mm512_maskz_range_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_maskz_range_round_ps(__U, __A, __B, 4, 8); 
+}
+
+__m512d test_mm512_reduce_pd(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_reduce_pd(__A, 4); 
+}
+
+__m512d test_mm512_mask_reduce_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m512d test_mm512_maskz_reduce_pd(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m512 test_mm512_reduce_ps(__m512 __A) {
+  // CHECK-LABEL: @test_mm512_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_reduce_ps(__A, 4); 
+}
+
+__m512 test_mm512_mask_reduce_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m512 test_mm512_maskz_reduce_ps(__mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_maskz_reduce_ps(__U, __A, 4); 
+}
+
+__m512d test_mm512_reduce_round_pd(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_reduce_round_pd(__A, 4, 8); 
+}
+
+__m512d test_mm512_mask_reduce_round_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_mask_reduce_round_pd(__W, __U, __A, 4, 8); 
+}
+
+__m512d test_mm512_maskz_reduce_round_pd(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_maskz_reduce_round_pd(__U, __A, 4, 8);
+}
+
+__m512 test_mm512_reduce_round_ps(__m512 __A) {
+  // CHECK-LABEL: @test_mm512_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_reduce_round_ps(__A, 4, 8); 
+}
+
+__m512 test_mm512_mask_reduce_round_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_mask_reduce_round_ps(__W, __U, __A, 4, 8); 
+}
+
+__m512 test_mm512_maskz_reduce_round_ps(__mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); 
+}
+
diff --git a/test/CodeGen/avx512er-builtins.c b/test/CodeGen/avx512er-builtins.c
index 993f177..7c6b050 100644
--- a/test/CodeGen/avx512er-builtins.c
+++ b/test/CodeGen/avx512er-builtins.c
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -9,14 +12,14 @@
 }
 
 __m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rsqrt28_round_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rsqrt28_round_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -27,14 +30,14 @@
 }
 
 __m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rsqrt28_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_mask_rsqrt28_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_mask_rsqrt28_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rsqrt28_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_maskz_rsqrt28_pd(m, a);
 }
 
@@ -75,38 +78,38 @@
 }
 
 __m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_rsqrt28_ss(a, b);
 }
 
 __m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_mask_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_mask_rsqrt28_ss(s, m, a, b);
 }
 
 __m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_maskz_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_maskz_rsqrt28_ss(m, a, b);
 }
 
@@ -135,14 +138,14 @@
 }
 
 __m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rcp28_round_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_mask_rcp28_round_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rcp28_round_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -153,14 +156,14 @@
 }
 
 __m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rcp28_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_mask_rcp28_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_mask_rcp28_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rcp28_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rcp28_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_maskz_rcp28_pd(m, a);
 }
 
@@ -201,38 +204,38 @@
 }
 
 __m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_rcp28_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_mask_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_maskz_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_rcp28_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_rcp28_ss(a, b);
 }
 
 __m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_mask_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_mask_rcp28_ss(s, m, a, b);
 }
 
 __m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_maskz_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_maskz_rcp28_ss(m, a, b);
 }
 
@@ -279,14 +282,14 @@
 }
 
 __m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_exp2a23_round_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_exp2a23_round_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -297,14 +300,14 @@
 }
 
 __m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_exp2a23_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_mask_exp2a23_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_mask_exp2a23_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_exp2a23_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_maskz_exp2a23_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_maskz_exp2a23_pd(m, a);
 }
 
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index a49a198..c1f4c0e 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -201,11 +201,486 @@
   return _mm512_broadcastsd_pd(a);
 }
 
-__m512i test_mm512_fmadd_pd(__m512d a, __m512d b, __m512d c)
-{
+__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+
+__m512d test_mm512_mask_fmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
   // CHECK-LABEL: @test_mm512_fmadd_pd
-  // CHECK: @llvm.x86.fma.mask.vfmadd.pd.512
-  return _mm512_fmadd_pd(a, b, c);
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmaddsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmaddsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U);
 }
 
 __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
@@ -297,8 +772,8 @@
 }
 
 __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
-  // check-label: @test_mm512_cmp_ps_mask
-  // check: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK-LABEL: @test_mm512_cmp_ps_mask
+  // CHECKn: @llvm.x86.avx512.mask.cmp.ps.512
   return _mm512_cmp_ps_mask(a, b, 0);
 }
 
@@ -321,8 +796,8 @@
 }
 
 __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
-  // check-label: @test_mm512_cmp_pd_mask
-  // check: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK-LABEL: @test_mm512_cmp_pd_mask
+  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
   return _mm512_cmp_pd_mask(a, b, 0);
 }
 
@@ -899,3 +1374,528 @@
   //CHECK: mul <16 x i32>
   return _mm512_mullo_epi32(__A,__B);
 }
+
+__m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_mask_add_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_maskz_add_pd(__U,__A,__B); 
+}
+__m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_mask_add_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_maskz_add_ps(__U,__A,__B); 
+}
+__m128 test_mm_add_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_add_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_add_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_mask_add_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_add_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_maskz_add_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_mask_add_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_maskz_add_ss(__U,__A,__B); 
+}
+__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_add_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_add_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_mask_add_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_add_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_maskz_add_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_mask_add_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_maskz_add_sd(__U,__A,__B); 
+}
+__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_maskz_sub_pd(__U,__A,__B); 
+}
+__m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_maskz_sub_ps(__U,__A,__B); 
+}
+__m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_sub_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_sub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_mask_sub_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_sub_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_maskz_sub_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_mask_sub_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_maskz_sub_ss(__U,__A,__B); 
+}
+__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_sub_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_sub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_mask_sub_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_sub_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_maskz_sub_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_mask_sub_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_maskz_sub_sd(__U,__A,__B); 
+}
+__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_maskz_mul_pd(__U,__A,__B); 
+}
+__m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_maskz_mul_ps(__U,__A,__B); 
+}
+__m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mul_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_mul_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mask_mul_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_mul_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_maskz_mul_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mask_mul_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_maskz_mul_ss(__U,__A,__B); 
+}
+__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mul_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_mul_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mask_mul_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_mul_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_maskz_mul_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mask_mul_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_maskz_mul_sd(__U,__A,__B); 
+}
+__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_mask_div_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_maskz_div_pd(__U,__A,__B); 
+}
+__m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_mask_div_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_maskz_div_ps(__U,__A,__B); 
+}
+__m128 test_mm_div_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_div_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_div_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_mask_div_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_maskz_div_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_mask_div_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_maskz_div_ss(__U,__A,__B); 
+}
+__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_div_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_div_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_mask_div_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_maskz_div_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_mask_div_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_maskz_div_sd(__U,__A,__B); 
+}
+__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_max_round_ss(__A,__B,0x08); 
+}
+__m128 test_mm_mask_max_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_mask_max_round_ss(__W,__U,__A,__B,0x08); 
+}
+__m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_maskz_max_round_ss(__U,__A,__B,0x08); 
+}
+__m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_mask_max_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_maskz_max_ss(__U,__A,__B); 
+}
+__m128d test_mm_max_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_max_round_sd(__A,__B,0x08); 
+}
+__m128d test_mm_mask_max_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_mask_max_round_sd(__W,__U,__A,__B,0x08); 
+}
+__m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_maskz_max_round_sd(__U,__A,__B,0x08); 
+}
+__m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_mask_max_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_maskz_max_sd(__U,__A,__B); 
+}
+__m128 test_mm_min_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_min_round_ss(__A,__B,0x08); 
+}
+__m128 test_mm_mask_min_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_mask_min_round_ss(__W,__U,__A,__B,0x08); 
+}
+__m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_maskz_min_round_ss(__U,__A,__B,0x08); 
+}
+__m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_mask_min_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_maskz_min_ss(__U,__A,__B); 
+}
+__m128d test_mm_min_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_min_round_sd(__A,__B,0x08); 
+}
+__m128d test_mm_mask_min_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_mask_min_round_sd(__W,__U,__A,__B,0x08); 
+}
+__m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_maskz_min_round_sd(__U,__A,__B,0x08); 
+}
+__m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_mask_min_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_maskz_min_sd(__U,__A,__B); 
+}
+
+__m512 test_mm512_undefined() {
+  // CHECK-LABEL: @test_mm512_undefined
+  // CHECK: ret <16 x float> undef
+  return _mm512_undefined();
+}
+
+__m512 test_mm512_undefined_ps() {
+  // CHECK-LABEL: @test_mm512_undefined_ps
+  // CHECK: ret <16 x float> undef
+  return _mm512_undefined_ps();
+}
+
+__m512d test_mm512_undefined_pd() {
+  // CHECK-LABEL: @test_mm512_undefined_pd
+  // CHECK: ret <8 x double> undef
+  return _mm512_undefined_pd();
+}
+
+__m512i test_mm512_undefined_epi32() {
+  // CHECK-LABEL: @test_mm512_undefined_epi32
+  // CHECK: ret <8 x i64> undef
+  return _mm512_undefined_epi32();
+}
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index 9446d46..445513c 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -1,103 +1,10 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
-__mmask8 test_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.256
-  return (__mmask8)_mm256_cmpeq_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.256
-  return (__mmask8)_mm256_mask_cmpeq_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.128
-  return (__mmask8)_mm_cmpeq_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.128
-  return (__mmask8)_mm_mask_cmpeq_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.256
-  return (__mmask8)_mm256_cmpeq_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.256
-  return (__mmask8)_mm256_mask_cmpeq_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.128
-  return (__mmask8)_mm_cmpeq_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.128
-  return (__mmask8)_mm_mask_cmpeq_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.256
-  return (__mmask8)_mm256_cmpgt_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.256
-  return (__mmask8)_mm256_mask_cmpgt_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.128
-  return (__mmask8)_mm_cmpgt_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.128
-  return (__mmask8)_mm_mask_cmpgt_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.256
-  return (__mmask8)_mm256_cmpgt_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.256
-  return (__mmask8)_mm256_mask_cmpgt_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.128
-  return (__mmask8)_mm_cmpgt_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.128
-  return (__mmask8)_mm_mask_cmpgt_epi64_mask(__u, __a, __b);
-}
-
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu32_mask
   // CHECK: @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> {{.*}}, <4 x i32> {{.*}}, i32 0, i8 -1)
@@ -1121,3 +1028,2129 @@
   // CHECK: @llvm.x86.avx512.mask.cmp.pd.128
   return _mm128_mask_cmp_pd_mask(m, __A, __B, 0);
 }
+
+
+//igorb
+
+__m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.128
+  return _mm_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.256
+  return _mm256_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.128
+  return _mm_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.256
+  return _mm256_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.128
+  return _mm_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.256
+  return _mm256_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.128
+  return _mm_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.256
+  return _mm256_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.128
+  return _mm_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.256
+  return _mm256_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.128
+  return _mm_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.256
+  return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.128
+  return _mm_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.256
+  return _mm256_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.128
+  return _mm_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.256
+  return _mm256_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.128
+  return _mm_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.128
+  return _mm_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.256
+  return _mm256_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.256
+  return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.128
+  return _mm_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.128
+  return _mm_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.256
+  return _mm256_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.256 
+  return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.128
+  return _mm_mask_add_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.128
+  return _mm_maskz_add_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.256
+  return _mm256_mask_add_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.256
+  return _mm256_maskz_add_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_add_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.128
+  return _mm_mask_add_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_add_ps(__mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.128
+  return _mm_maskz_add_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_add_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.256
+  return _mm256_mask_add_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_add_ps(__mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.256
+  return _mm256_maskz_add_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi32
+  // CHECK: @llvm.x86.avx512.mask.blend.d.128
+  return _mm_mask_blend_epi32(__U,__A,__W); 
+}
+__m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi32
+  // CHECK: @llvm.x86.avx512.mask.blend.d.256
+  return _mm256_mask_blend_epi32(__U,__A,__W); 
+}
+__m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_pd
+  // CHECK: @llvm.x86.avx512.mask.blend.pd.128
+  return _mm_mask_blend_pd(__U,__A,__W); 
+}
+__m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_pd
+  // CHECK: @llvm.x86.avx512.mask.blend.pd.256
+  return _mm256_mask_blend_pd(__U,__A,__W); 
+}
+__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_ps
+  // CHECK: @llvm.x86.avx512.mask.blend.ps.128
+  return _mm_mask_blend_ps(__U,__A,__W); 
+}
+__m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_ps
+  // CHECK: @llvm.x86.avx512.mask.blend.ps.256
+  return _mm256_mask_blend_ps(__U,__A,__W); 
+}
+__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi64
+  // CHECK: @llvm.x86.avx512.mask.blend.q.128
+  return _mm_mask_blend_epi64(__U,__A,__W); 
+}
+__m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi64
+  // CHECK: @llvm.x86.avx512.mask.blend.q.256
+  return _mm256_mask_blend_epi64(__U,__A,__W); 
+}
+__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  return _mm_mask_compress_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  return _mm_maskz_compress_pd(__U,__A); 
+}
+__m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  return _mm256_mask_compress_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  return _mm256_maskz_compress_pd(__U,__A); 
+}
+__m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  return _mm_mask_compress_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  return _mm_maskz_compress_epi64(__U,__A); 
+}
+__m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  return _mm256_mask_compress_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  return _mm256_maskz_compress_epi64(__U,__A); 
+}
+__m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  return _mm_mask_compress_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  return _mm_maskz_compress_ps(__U,__A); 
+}
+__m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  return _mm256_mask_compress_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  return _mm256_maskz_compress_ps(__U,__A); 
+}
+__m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  return _mm_mask_compress_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  return _mm_maskz_compress_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  return _mm256_mask_compress_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  return _mm256_maskz_compress_epi32(__U,__A); 
+}
+void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.store.pd.128
+  return _mm_mask_compressstoreu_pd(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.store.pd.256
+  return _mm256_mask_compressstoreu_pd(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.store.q.128
+  return _mm_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.store.q.256
+  return _mm256_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.store.ps.128
+  return _mm_mask_compressstoreu_ps(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.store.ps.256
+  return _mm256_mask_compressstoreu_ps(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.store.d.128
+  return _mm_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.store.d.256
+  return _mm256_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+__m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
+  return _mm_mask_cvtepi32_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
+  return _mm_maskz_cvtepi32_pd(__U,__A); 
+}
+__m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
+  return _mm256_mask_cvtepi32_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
+  return _mm256_maskz_cvtepi32_pd(__U,__A); 
+}
+__m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  return _mm_mask_cvtepi32_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  return _mm_maskz_cvtepi32_ps(__U,__A); 
+}
+__m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  return _mm256_mask_cvtepi32_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  return _mm256_maskz_cvtepi32_ps(__U,__A); 
+}
+__m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128
+  return _mm_mask_cvtpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128
+  return _mm_maskz_cvtpd_epi32(__U,__A); 
+}
+__m128i test_mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  return _mm256_mask_cvtpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  return _mm256_maskz_cvtpd_epi32(__U,__A); 
+}
+__m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
+  return _mm_mask_cvtpd_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
+  return _mm_maskz_cvtpd_ps(__U,__A); 
+}
+__m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  return _mm256_mask_cvtpd_ps(__W,__U,__A); 
+}
+__m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  return _mm256_maskz_cvtpd_ps(__U,__A); 
+}
+__m128i test_mm_cvtpd_epu32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_cvtpd_epu32(__A); 
+}
+__m128i test_mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_mask_cvtpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_maskz_cvtpd_epu32(__U,__A); 
+}
+__m128i test_mm256_cvtpd_epu32(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_cvtpd_epu32(__A); 
+}
+__m128i test_mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_mask_cvtpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_maskz_cvtpd_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  return _mm_mask_cvtps_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  return _mm_maskz_cvtps_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  return _mm256_mask_cvtps_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  return _mm256_maskz_cvtps_epi32(__U,__A); 
+}
+__m128d test_mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  return _mm_mask_cvtps_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  return _mm_maskz_cvtps_pd(__U,__A); 
+}
+__m256d test_mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  return _mm256_mask_cvtps_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  return _mm256_maskz_cvtps_pd(__U,__A); 
+}
+__m128i test_mm_cvtps_epu32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_cvtps_epu32(__A); 
+}
+__m128i test_mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_mask_cvtps_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_maskz_cvtps_epu32(__U,__A); 
+}
+__m256i test_mm256_cvtps_epu32(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_cvtps_epu32(__A); 
+}
+__m256i test_mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_mask_cvtps_epu32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_maskz_cvtps_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128
+  return _mm_mask_cvttpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128
+  return _mm_maskz_cvttpd_epi32(__U,__A); 
+}
+__m128i test_mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  return _mm256_mask_cvttpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  return _mm256_maskz_cvttpd_epi32(__U,__A); 
+}
+__m128i test_mm_cvttpd_epu32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_cvttpd_epu32(__A); 
+}
+__m128i test_mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_mask_cvttpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_maskz_cvttpd_epu32(__U,__A); 
+}
+__m128i test_mm256_cvttpd_epu32(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_cvttpd_epu32(__A); 
+}
+__m128i test_mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_mask_cvttpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_maskz_cvttpd_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  return _mm_mask_cvttps_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  return _mm_maskz_cvttps_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  return _mm256_mask_cvttps_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  return _mm256_maskz_cvttps_epi32(__U,__A); 
+}
+__m128i test_mm_cvttps_epu32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_cvttps_epu32(__A); 
+}
+__m128i test_mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_mask_cvttps_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_maskz_cvttps_epu32(__U,__A); 
+}
+__m256i test_mm256_cvttps_epu32(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_cvttps_epu32(__A); 
+}
+__m256i test_mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_mask_cvttps_epu32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_maskz_cvttps_epu32(__U,__A); 
+}
+__m128d test_mm_cvtepu32_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_cvtepu32_pd(__A); 
+}
+__m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_mask_cvtepu32_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_maskz_cvtepu32_pd(__U,__A); 
+}
+__m256d test_mm256_cvtepu32_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_cvtepu32_pd(__A); 
+}
+__m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_mask_cvtepu32_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_maskz_cvtepu32_pd(__U,__A); 
+}
+__m128 test_mm_cvtepu32_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_cvtepu32_ps(__A); 
+}
+__m128 test_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_mask_cvtepu32_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_maskz_cvtepu32_ps(__U,__A); 
+}
+__m256 test_mm256_cvtepu32_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_cvtepu32_ps(__A); 
+}
+__m256 test_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_mask_cvtepu32_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_maskz_cvtepu32_ps(__U,__A); 
+}
+__m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.128
+  return _mm_mask_div_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.128
+  return _mm_maskz_div_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.256
+  return _mm256_mask_div_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.256
+  return _mm256_maskz_div_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.128
+  return _mm_mask_div_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.128
+  return _mm_maskz_div_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.256
+  return _mm256_mask_div_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.256
+  return _mm256_maskz_div_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  return _mm_mask_expand_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  return _mm_maskz_expand_pd(__U,__A); 
+}
+__m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  return _mm256_mask_expand_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  return _mm256_maskz_expand_pd(__U,__A); 
+}
+__m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  return _mm_mask_expand_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  return _mm_maskz_expand_epi64(__U,__A); 
+}
+__m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  return _mm256_mask_expand_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  return _mm256_maskz_expand_epi64(__U,__A); 
+}
+__m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.128
+  return _mm_mask_expandloadu_pd(__W,__U,__P); 
+}
+__m128d test_mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.128
+  return _mm_maskz_expandloadu_pd(__U,__P); 
+}
+__m256d test_mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.256
+  return _mm256_mask_expandloadu_pd(__W,__U,__P); 
+}
+__m256d test_mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.256
+  return _mm256_maskz_expandloadu_pd(__U,__P); 
+}
+__m128i test_mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.128
+  return _mm_mask_expandloadu_epi64(__W,__U,__P); 
+}
+__m128i test_mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.128
+  return _mm_maskz_expandloadu_epi64(__U,__P); 
+}
+__m256i test_mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U,   void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.256
+  return _mm256_mask_expandloadu_epi64(__W,__U,__P); 
+}
+__m256i test_mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.256
+  return _mm256_maskz_expandloadu_epi64(__U,__P); 
+}
+__m128 test_mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.128
+  return _mm_mask_expandloadu_ps(__W,__U,__P); 
+}
+__m128 test_mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.128
+  return _mm_maskz_expandloadu_ps(__U,__P); 
+}
+__m256 test_mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.256
+  return _mm256_mask_expandloadu_ps(__W,__U,__P); 
+}
+__m256 test_mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.256
+  return _mm256_maskz_expandloadu_ps(__U,__P); 
+}
+__m128i test_mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.128
+  return _mm_mask_expandloadu_epi32(__W,__U,__P); 
+}
+__m128i test_mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.128
+  return _mm_maskz_expandloadu_epi32(__U,__P); 
+}
+__m256i test_mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U,   void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.256
+  return _mm256_mask_expandloadu_epi32(__W,__U,__P); 
+}
+__m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.256
+  return _mm256_maskz_expandloadu_epi32(__U,__P); 
+}
+__m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  return _mm_mask_expand_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  return _mm_maskz_expand_ps(__U,__A); 
+}
+__m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  return _mm256_mask_expand_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  return _mm256_maskz_expand_ps(__U,__A); 
+}
+__m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  return _mm_mask_expand_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  return _mm_maskz_expand_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  return _mm256_mask_expand_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  return _mm256_maskz_expand_epi32(__U,__A); 
+}
+__m128d test_mm_getexp_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_getexp_pd(__A); 
+}
+__m128d test_mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_mask_getexp_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_getexp_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_maskz_getexp_pd(__U,__A); 
+}
+__m256d test_mm256_getexp_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_getexp_pd(__A); 
+}
+__m256d test_mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_mask_getexp_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_maskz_getexp_pd(__U,__A); 
+}
+__m128 test_mm_getexp_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_getexp_ps(__A); 
+}
+__m128 test_mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_mask_getexp_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_getexp_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_maskz_getexp_ps(__U,__A); 
+}
+__m256 test_mm256_getexp_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_getexp_ps(__A); 
+}
+__m256 test_mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_mask_getexp_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_maskz_getexp_ps(__U,__A); 
+}
+__m128d test_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd
+  return _mm_mask_max_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd
+  return _mm_maskz_max_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd.256
+  return _mm256_mask_max_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd.256
+  return _mm256_maskz_max_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps
+  return _mm_mask_max_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps
+  return _mm_maskz_max_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps.256
+  return _mm256_mask_max_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps.256
+  return _mm256_maskz_max_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd
+  return _mm_mask_min_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd
+  return _mm_maskz_min_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd.256
+  return _mm256_mask_min_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd.256
+  return _mm256_maskz_min_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps
+  return _mm_mask_min_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps
+  return _mm_maskz_min_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps.256
+  return _mm256_mask_min_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps.256
+  return _mm256_maskz_min_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd
+  return _mm_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd
+  return _mm_maskz_mul_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.256
+  return _mm256_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.256
+  return _mm256_maskz_mul_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps
+  return _mm_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps
+  return _mm_maskz_mul_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.256
+  return _mm256_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.256
+  return _mm256_maskz_mul_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.128
+  return _mm_mask_abs_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.128
+  return _mm_maskz_abs_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.256
+  return _mm256_mask_abs_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.256
+  return _mm256_maskz_abs_epi32(__U,__A); 
+}
+__m128i test_mm_abs_epi64(__m128i __A) {
+  // CHECK-LABEL: @test_mm_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_abs_epi64(__A); 
+}
+__m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_mask_abs_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_maskz_abs_epi64(__U,__A); 
+}
+__m256i test_mm256_abs_epi64(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_abs_epi64(__A); 
+}
+__m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_mask_abs_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_maskz_abs_epi64(__U,__A); 
+}
+__m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.128
+  return _mm_maskz_max_epi32(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.128
+  return _mm_mask_max_epi32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.256
+  return _mm256_maskz_max_epi32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.256
+  return _mm256_mask_max_epi32(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_maskz_max_epi64(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_mask_max_epi64(__W,__M,__A,__B); 
+}
+__m128i test_mm_max_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_max_epi64(__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_maskz_max_epi64(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_mask_max_epi64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_max_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_max_epi64(__A,__B); 
+}
+__m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.128
+  return _mm_maskz_max_epu32(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.128
+  return _mm_mask_max_epu32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.256
+  return _mm256_maskz_max_epu32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.256
+  return _mm256_mask_max_epu32(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_maskz_max_epu64(__M,__A,__B); 
+}
+__m128i test_mm_max_epu64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_max_epu64(__A,__B); 
+}
+__m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_mask_max_epu64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_maskz_max_epu64(__M,__A,__B); 
+}
+__m256i test_mm256_max_epu64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_max_epu64(__A,__B); 
+}
+__m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_mask_max_epu64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.128
+  return _mm_maskz_min_epi32(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.128
+  return _mm_mask_min_epi32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.256
+  return _mm256_maskz_min_epi32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.256
+  return _mm256_mask_min_epi32(__W,__M,__A,__B); 
+}
+__m128i test_mm_min_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_min_epi64(__A,__B); 
+}
+__m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_mask_min_epi64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_maskz_min_epi64(__M,__A,__B); 
+}
+__m256i test_mm256_min_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_min_epi64(__A,__B); 
+}
+__m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_mask_min_epi64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_maskz_min_epi64(__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.128
+  return _mm_maskz_min_epu32(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.128
+  return _mm_mask_min_epu32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.256
+  return _mm256_maskz_min_epu32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.256
+  return _mm256_mask_min_epu32(__W,__M,__A,__B); 
+}
+__m128i test_mm_min_epu64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_min_epu64(__A,__B); 
+}
+__m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_mask_min_epu64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_maskz_min_epu64(__M,__A,__B); 
+}
+__m256i test_mm256_min_epu64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_min_epu64(__A,__B); 
+}
+__m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_mask_min_epu64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_maskz_min_epu64(__M,__A,__B); 
+}
+__m128d test_mm_roundscale_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_roundscale_pd(__A,4); 
+}
+__m128d test_mm_mask_roundscale_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_mask_roundscale_pd(__W,__U,__A,4); 
+}
+__m128d test_mm_maskz_roundscale_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_maskz_roundscale_pd(__U,__A,4); 
+}
+__m256d test_mm256_roundscale_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_roundscale_pd(__A,4); 
+}
+__m256d test_mm256_mask_roundscale_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_mask_roundscale_pd(__W,__U,__A,4); 
+}
+__m256d test_mm256_maskz_roundscale_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_maskz_roundscale_pd(__U,__A,4); 
+}
+__m128 test_mm_roundscale_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_roundscale_ps(__A,4); 
+}
+__m128 test_mm_mask_roundscale_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_mask_roundscale_ps(__W,__U,__A,4); 
+}
+__m128 test_mm_maskz_roundscale_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_maskz_roundscale_ps(__U,__A, 4); 
+}
+__m256 test_mm256_roundscale_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_roundscale_ps(__A,4); 
+}
+__m256 test_mm256_mask_roundscale_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_mask_roundscale_ps(__W,__U,__A,4); 
+}
+__m256 test_mm256_maskz_roundscale_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_maskz_roundscale_ps(__U,__A,4); 
+}
+__m128d test_mm_scalef_pd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_scalef_pd(__A,__B); 
+}
+__m128d test_mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_mask_scalef_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_maskz_scalef_pd(__U,__A,__B); 
+}
+__m256d test_mm256_scalef_pd(__m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_scalef_pd(__A,__B); 
+}
+__m256d test_mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_mask_scalef_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_maskz_scalef_pd(__U,__A,__B); 
+}
+__m128 test_mm_scalef_ps(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_scalef_ps(__A,__B); 
+}
+__m128 test_mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_mask_scalef_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_maskz_scalef_ps(__U,__A,__B); 
+}
+__m256 test_mm256_scalef_ps(__m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_scalef_ps(__A,__B); 
+}
+__m256 test_mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_mask_scalef_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_maskz_scalef_ps(__U,__A,__B); 
+}
+void test_mm_i64scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  return _mm_i64scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  return _mm_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_pd(double *__addr, __m256i __index,  __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  return _mm256_i64scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  return _mm256_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  return _mm_i64scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  return _mm_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_ps(float *__addr, __m256i __index,  __m128 __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  return _mm256_i64scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_epi32(int *__addr, __m256i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  return _mm256_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  return _mm_i32scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  return _mm_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_pd(double *__addr, __m128i __index,  __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  return _mm256_i32scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  return _mm256_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask,  __m128i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  return _mm_i32scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  return _mm_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_ps(float *__addr, __m256i __index,  __m256 __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  return _mm256_i32scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_epi32(int *__addr, __m256i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  return _mm256_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+__m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.128
+  return _mm_mask_sqrt_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.128
+  return _mm_maskz_sqrt_pd(__U,__A); 
+}
+__m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.256
+  return _mm256_mask_sqrt_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.256
+  return _mm256_maskz_sqrt_pd(__U,__A); 
+}
+__m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.128
+  return _mm_mask_sqrt_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.128
+  return _mm_maskz_sqrt_ps(__U,__A); 
+}
+__m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.256
+  return _mm256_mask_sqrt_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.256
+  return _mm256_maskz_sqrt_ps(__U,__A); 
+}
+__m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.128
+  return _mm_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.128
+  return _mm_maskz_sub_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.256
+  return _mm256_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.256
+  return _mm256_maskz_sub_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_sub_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.128
+  return _mm_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_sub_ps(__mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.128
+  return _mm_maskz_sub_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_sub_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.256
+  return _mm256_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_sub_ps(__mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.256
+  return _mm256_maskz_sub_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.d.128
+  return _mm_mask2_permutex2var_epi32(__A,__I,__U,__B); 
+}
+__m256i test_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.d.256
+  return _mm256_mask2_permutex2var_epi32(__A,__I,__U,__B); 
+}
+__m128d test_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.pd.128
+  return _mm_mask2_permutex2var_pd(__A,__I,__U,__B); 
+}
+__m256d test_mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,  __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.pd.256
+  return _mm256_mask2_permutex2var_pd(__A,__I,__U,__B); 
+}
+__m128 test_mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.ps.128
+  return _mm_mask2_permutex2var_ps(__A,__I,__U,__B); 
+}
+__m256 test_mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,  __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.ps.256
+  return _mm256_mask2_permutex2var_ps(__A,__I,__U,__B); 
+}
+__m128i test_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.q.128
+  return _mm_mask2_permutex2var_epi64(__A,__I,__U,__B); 
+}
+__m256i test_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.q.256
+  return _mm256_mask2_permutex2var_epi64(__A,__I,__U,__B); 
+}
+__m128i test_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.128
+  return _mm_permutex2var_epi32(__A,__I,__B); 
+}
+__m128i test_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.128
+  return _mm_mask_permutex2var_epi32(__A,__U,__I,__B); 
+}
+__m128i test_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.d.128
+  return _mm_maskz_permutex2var_epi32(__U,__A,__I,__B); 
+}
+__m256i test_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.256
+  return _mm256_permutex2var_epi32(__A,__I,__B); 
+}
+__m256i test_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.256
+  return _mm256_mask_permutex2var_epi32(__A,__U,__I,__B); 
+}
+__m256i test_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.d.256
+  return _mm256_maskz_permutex2var_epi32(__U,__A,__I,__B); 
+}
+__m128d test_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.128
+  return _mm_permutex2var_pd(__A,__I,__B); 
+}
+__m128d test_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.128
+  return _mm_mask_permutex2var_pd(__A,__U,__I,__B); 
+}
+__m128d test_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.pd.128
+  return _mm_maskz_permutex2var_pd(__U,__A,__I,__B); 
+}
+__m256d test_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.256
+  return _mm256_permutex2var_pd(__A,__I,__B); 
+}
+__m256d test_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.256
+  return _mm256_mask_permutex2var_pd(__A,__U,__I,__B); 
+}
+__m256d test_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,  __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.pd.256
+  return _mm256_maskz_permutex2var_pd(__U,__A,__I,__B); 
+}
+__m128 test_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.128
+  return _mm_permutex2var_ps(__A,__I,__B); 
+}
+__m128 test_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.128
+  return _mm_mask_permutex2var_ps(__A,__U,__I,__B); 
+}
+__m128 test_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.ps.128
+  return _mm_maskz_permutex2var_ps(__U,__A,__I,__B); 
+}
+__m256 test_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.256
+  return _mm256_permutex2var_ps(__A,__I,__B); 
+}
+__m256 test_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.256
+  return _mm256_mask_permutex2var_ps(__A,__U,__I,__B); 
+}
+__m256 test_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.ps.256
+  return _mm256_maskz_permutex2var_ps(__U,__A,__I,__B); 
+}
+__m128i test_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.128
+  return _mm_permutex2var_epi64(__A,__I,__B); 
+}
+__m128i test_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.128
+  return _mm_mask_permutex2var_epi64(__A,__U,__I,__B); 
+}
+__m128i test_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.q.128
+  return _mm_maskz_permutex2var_epi64(__U,__A,__I,__B); 
+}
+__m256i test_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.256
+  return _mm256_permutex2var_epi64(__A,__I,__B); 
+}
+__m256i test_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.256
+  return _mm256_mask_permutex2var_epi64(__A,__U,__I,__B); 
+}
+__m256i test_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.q.256
+  return _mm256_maskz_permutex2var_epi64(__U,__A,__I,__B); 
+}
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index a4a1244..11155f6 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -792,3 +796,912 @@
   //CHECK: @llvm.x86.avx512.mask.pmull.w.128
   return _mm_maskz_mullo_epi16(__U , __A, __B);
 }
+
+
+__m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi8
+  // CHECK: @llvm.x86.avx512.mask.blend.b.128
+  return _mm_mask_blend_epi8(__U,__A,__W); 
+}
+__m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi8
+  // CHECK: @llvm.x86.avx512.mask.blend.b.256
+  return _mm256_mask_blend_epi8(__U,__A,__W); 
+}
+
+__m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi16
+  // CHECK: @llvm.x86.avx512.mask.blend.w.128
+  return _mm_mask_blend_epi16(__U,__A,__W); 
+}
+
+__m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi16
+  // CHECK: @llvm.x86.avx512.mask.blend.w.256
+  return _mm256_mask_blend_epi16(__U,__A,__W); 
+}
+
+__m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.128
+  return _mm_mask_abs_epi8(__W,__U,__A); 
+}
+
+__m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.128
+  return _mm_maskz_abs_epi8(__U,__A); 
+}
+
+__m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.256
+  return _mm256_mask_abs_epi8(__W,__U,__A); 
+}
+
+__m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.256
+  return _mm256_maskz_abs_epi8(__U,__A); 
+}
+
+__m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.128
+  return _mm_mask_abs_epi16(__W,__U,__A); 
+}
+
+__m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.128
+  return _mm_maskz_abs_epi16(__U,__A); 
+}
+
+__m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.256
+  return _mm256_mask_abs_epi16(__W,__U,__A); 
+}
+
+__m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.256
+  return _mm256_maskz_abs_epi16(__U,__A); 
+}
+
+__m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.128
+  return _mm_maskz_packs_epi32(__M,__A,__B); 
+}
+__m128i test_mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A,          __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.128
+  return _mm_mask_packs_epi32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.256
+  return _mm256_maskz_packs_epi32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A,       __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.256
+  return _mm256_mask_packs_epi32(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.128
+  return _mm_maskz_packs_epi16(__M,__A,__B); 
+}
+__m128i test_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A,          __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.128
+  return _mm_mask_packs_epi16(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.256
+  return _mm256_maskz_packs_epi16(__M,__A,__B); 
+}
+__m256i test_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A,       __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.256
+  return _mm256_mask_packs_epi16(__W,__M,__A,__B); 
+}
+
+__m128i test_mm_mask_packus_epi32(__m128i __W, __mmask16 __M, __m128i __A,           __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.128
+  return _mm_mask_packus_epi32(__W,__M,__A,__B); 
+}
+
+__m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.128
+  return _mm_maskz_packus_epi32(__M,__A,__B); 
+}
+
+__m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.256
+  return _mm256_maskz_packus_epi32(__M,__A,__B); 
+}
+
+__m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A,        __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.256
+  return _mm256_mask_packus_epi32(__W,__M,__A,__B); 
+}
+
+__m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.128
+  return _mm_maskz_packus_epi16(__M,__A,__B); 
+}
+
+__m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A,           __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.128
+  return _mm_mask_packus_epi16(__W,__M,__A,__B); 
+}
+
+__m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.256
+  return _mm256_maskz_packus_epi16(__M,__A,__B); 
+}
+
+__m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A,        __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.256
+  return _mm256_mask_packus_epi16(__W,__M,__A,__B); 
+}
+
+__m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.128
+  return _mm_mask_adds_epi8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.128
+  return _mm_maskz_adds_epi8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.256
+  return _mm256_mask_adds_epi8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.256
+  return _mm256_maskz_adds_epi8(__U,__A,__B); 
+}
+__m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.128
+  return _mm_mask_adds_epi16(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.128
+  return _mm_maskz_adds_epi16(__U,__A,__B); 
+}
+__m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.256
+  return _mm256_mask_adds_epi16(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.256
+  return _mm256_maskz_adds_epi16(__U,__A,__B); 
+}
+__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.128
+  return _mm_mask_adds_epu8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.128
+  return _mm_maskz_adds_epu8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.256
+  return _mm256_mask_adds_epu8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.256
+  return _mm256_maskz_adds_epu8(__U,__A,__B); 
+}
+__m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.128
+  return _mm_mask_adds_epu16(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.128
+  return _mm_maskz_adds_epu16(__U,__A,__B); 
+}
+__m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.256
+  return _mm256_mask_adds_epu16(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.256
+  return _mm256_maskz_adds_epu16(__U,__A,__B); 
+}
+__m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A,       __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.128
+  return _mm_mask_avg_epu8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.128
+  return _mm_maskz_avg_epu8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A,          __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.256
+  return _mm256_mask_avg_epu8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.256
+  return _mm256_maskz_avg_epu8(__U,__A,__B); 
+}
+__m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.128
+  return _mm_mask_avg_epu16(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.128
+  return _mm_maskz_avg_epu16(__U,__A,__B); 
+}
+__m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.256
+  return _mm256_mask_avg_epu16(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.256
+  return _mm256_maskz_avg_epu16(__U,__A,__B); 
+}
+__m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.128
+  return _mm_maskz_max_epi8(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A,       __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.128
+  return _mm_mask_max_epi8(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.256
+  return _mm256_maskz_max_epi8(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A,          __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.256
+  return _mm256_mask_max_epi8(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.128
+  return _mm_maskz_max_epi16(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.128
+  return _mm_mask_max_epi16(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.256
+  return _mm256_maskz_max_epi16(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.256
+  return _mm256_mask_max_epi16(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.128
+  return _mm_maskz_max_epu8(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A,       __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.128
+  return _mm_mask_max_epu8(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.256
+  return _mm256_maskz_max_epu8(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A,          __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.256
+  return _mm256_mask_max_epu8(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.128
+  return _mm_maskz_max_epu16(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.128
+  return _mm_mask_max_epu16(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.256
+  return _mm256_maskz_max_epu16(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.256
+  return _mm256_mask_max_epu16(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.128
+  return _mm_maskz_min_epi8(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A,       __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.128
+  return _mm_mask_min_epi8(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.256
+  return _mm256_maskz_min_epi8(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A,          __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.256
+  return _mm256_mask_min_epi8(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.128
+  return _mm_maskz_min_epi16(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.128
+  return _mm_mask_min_epi16(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.256
+  return _mm256_maskz_min_epi16(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.256
+  return _mm256_mask_min_epi16(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.128
+  return _mm_maskz_min_epu8(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A,       __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.128
+  return _mm_mask_min_epu8(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.256
+  return _mm256_maskz_min_epu8(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A,          __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.256
+  return _mm256_mask_min_epu8(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.128
+  return _mm_maskz_min_epu16(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.128
+  return _mm_mask_min_epu16(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.256
+  return _mm256_maskz_min_epu16(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.256
+  return _mm256_mask_min_epu16(__W,__M,__A,__B); 
+}
+__m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A,           __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.128
+  return _mm_mask_shuffle_epi8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.128
+  return _mm_maskz_shuffle_epi8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A,        __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.256
+  return _mm256_mask_shuffle_epi8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.256
+  return _mm256_maskz_shuffle_epi8(__U,__A,__B); 
+}
+__m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.128
+  return _mm_mask_subs_epi8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.128
+  return _mm_maskz_subs_epi8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.256
+  return _mm256_mask_subs_epi8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.256
+  return _mm256_maskz_subs_epi8(__U,__A,__B); 
+}
+__m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.128
+  return _mm_mask_subs_epi16(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.128
+  return _mm_maskz_subs_epi16(__U,__A,__B); 
+}
+__m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.256
+  return _mm256_mask_subs_epi16(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.256
+  return _mm256_maskz_subs_epi16(__U,__A,__B); 
+}
+__m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.128
+  return _mm_mask_subs_epu8(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.128
+  return _mm_maskz_subs_epu8(__U,__A,__B); 
+}
+__m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.256
+  return _mm256_mask_subs_epu8(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.256
+  return _mm256_maskz_subs_epu8(__U,__A,__B); 
+}
+__m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.128
+  return _mm_mask_subs_epu16(__W,__U,__A,__B); 
+}
+__m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.128
+  return _mm_maskz_subs_epu16(__U,__A,__B); 
+}
+__m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.256
+  return _mm256_mask_subs_epu16(__W,__U,__A,__B); 
+}
+__m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.256
+  return _mm256_maskz_subs_epu16(__U,__A,__B); 
+}
+
+
+__m128i test_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,            __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.hi.128
+  return _mm_mask2_permutex2var_epi16(__A,__I,__U,__B); 
+}
+__m256i test_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I,         __mmask16 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.hi.256
+  return _mm256_mask2_permutex2var_epi16(__A,__I,__U,__B); 
+}
+__m128i test_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.128
+  return _mm_permutex2var_epi16(__A,__I,__B); 
+}
+__m128i test_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,           __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.128
+  return _mm_mask_permutex2var_epi16(__A,__U,__I,__B); 
+}
+__m128i test_mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I,            __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.128
+  return _mm_maskz_permutex2var_epi16(__U,__A,__I,__B); 
+}
+
+__m256i test_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.256
+  return _mm256_permutex2var_epi16(__A,__I,__B); 
+}
+__m256i test_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U,        __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.256
+  return _mm256_mask_permutex2var_epi16(__A,__U,__I,__B); 
+}
+__m256i test_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A,         __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.256
+  return _mm256_maskz_permutex2var_epi16(__U,__A,__I,__B); 
+}
+__m128i test_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.128
+  return _mm_mask_maddubs_epi16(__W, __U, __X, __Y); 
+}
+
+__m128i test_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.128
+  return _mm_maskz_maddubs_epi16(__U, __X, __Y); 
+}
+
+__m256i test_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.256
+  return _mm256_mask_maddubs_epi16(__W, __U, __X, __Y); 
+}
+
+__m256i test_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.256
+  return _mm256_maskz_maddubs_epi16(__U, __X, __Y); 
+}
+
+__m128i test_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.128
+  return _mm_mask_madd_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.128
+  return _mm_maskz_madd_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.256
+  return _mm256_mask_madd_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.256
+  return _mm256_maskz_madd_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_cvtsepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_cvtsepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtsepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_cvtsepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_cvtusepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_cvtusepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtusepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_cvtusepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_cvtepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_cvtepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_cvtepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.128
+  return _mm_mask_mulhrs_epi16(__W, __U, __X, __Y); 
+}
+
+__m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.128
+  return _mm_maskz_mulhrs_epi16(__U, __X, __Y); 
+}
+
+__m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.256
+  return _mm256_mask_mulhrs_epi16(__W, __U, __X, __Y); 
+}
+
+__m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.256
+  return _mm256_maskz_mulhrs_epi16(__U, __X, __Y); 
+}
+
+__m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.128
+  return _mm_mask_mulhi_epu16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.128
+  return _mm_maskz_mulhi_epu16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.256
+  return _mm256_mask_mulhi_epu16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.256
+  return _mm256_maskz_mulhi_epu16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.128
+  return _mm_mask_mulhi_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.128
+  return _mm_maskz_mulhi_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.256
+  return _mm256_mask_mulhi_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.256
+  return _mm256_maskz_mulhi_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  return _mm_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  return _mm_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  return _mm256_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  return _mm_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  return _mm_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  return _mm256_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  return _mm_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  return _mm_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  return _mm256_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  return _mm_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  return _mm_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  return _mm256_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c
index a9b6dbf..69bdc7a 100644
--- a/test/CodeGen/avx512vldq-builtins.c
+++ b/test/CodeGen/avx512vldq-builtins.c
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -229,3 +232,579 @@
   // CHECK: @llvm.x86.avx512.mask.or.ps.128
   return (__m128) _mm_maskz_or_ps(__U, __A, __B);
 }
+
+__m128i test_mm_cvtpd_epi64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_cvtpd_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvtpd_epi64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_cvtpd_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvtpd_epu64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_cvtpd_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvtpd_epu64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_cvtpd_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m128i test_mm_cvtps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_cvtps_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvtps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_cvtps_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvtps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_cvtps_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvtps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_cvtps_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m128d test_mm_cvtepi64_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_cvtepi64_pd(__A); 
+}
+
+__m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m256d test_mm256_cvtepi64_pd(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_cvtepi64_pd(__A); 
+}
+
+__m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m128 test_mm_cvtepi64_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_cvtepi64_ps(__A); 
+}
+
+__m128 test_mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m128 test_mm256_cvtepi64_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_cvtepi64_ps(__A); 
+}
+
+__m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m128i test_mm_cvttpd_epi64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_cvttpd_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvttpd_epi64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_cvttpd_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvttpd_epu64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_cvttpd_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvttpd_epu64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_cvttpd_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m128i test_mm_cvttps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_cvttps_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvttps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_cvttps_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvttps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_cvttps_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvttps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_cvttps_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m128d test_mm_cvtepu64_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_cvtepu64_pd(__A); 
+}
+
+__m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m256d test_mm256_cvtepu64_pd(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_cvtepu64_pd(__A); 
+}
+
+__m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m128 test_mm_cvtepu64_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_cvtepu64_ps(__A); 
+}
+
+__m128 test_mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m128 test_mm256_cvtepu64_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_cvtepu64_ps(__A); 
+}
+
+__m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m128d test_mm_range_pd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_range_pd(__A, __B, 4); 
+}
+
+__m128d test_mm_mask_range_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m128d test_mm_maskz_range_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m256d test_mm256_range_pd(__m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_range_pd(__A, __B, 4); 
+}
+
+__m256d test_mm256_mask_range_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m256d test_mm256_maskz_range_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m128 test_mm_range_ps(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_range_ps(__A, __B, 4); 
+}
+
+__m128 test_mm_mask_range_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m128 test_mm_maskz_range_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m256 test_mm256_range_ps(__m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_range_ps(__A, __B, 4); 
+}
+
+__m256 test_mm256_mask_range_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m256 test_mm256_maskz_range_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m128d test_mm_reduce_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_reduce_pd(__A, 4); 
+}
+
+__m128d test_mm_mask_reduce_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m128d test_mm_maskz_reduce_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m256d test_mm256_reduce_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_reduce_pd(__A, 4); 
+}
+
+__m256d test_mm256_mask_reduce_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m256d test_mm256_maskz_reduce_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m128 test_mm_reduce_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_reduce_ps(__A, 4); 
+}
+
+__m128 test_mm_mask_reduce_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m128 test_mm_maskz_reduce_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_maskz_reduce_ps(__U, __A, 4); 
+}
+
+__m256 test_mm256_reduce_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_reduce_ps(__A, 4); 
+}
+
+__m256 test_mm256_mask_reduce_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m256 test_mm256_maskz_reduce_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_maskz_reduce_ps(__U, __A, 4); 
+}
diff --git a/test/CodeGen/big-atomic-ops.c b/test/CodeGen/big-atomic-ops.c
index 28b7b5d..6a7a700 100644
--- a/test/CodeGen/big-atomic-ops.c
+++ b/test/CodeGen/big-atomic-ops.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-apple-macosx10.9.0 | FileCheck %s
-
+// REQUIRES: x86-registered-target
 // Also test serialization of atomic operations here, to avoid duplicating the
 // test.
 // RUN: %clang_cc1 %s -emit-pch -o %t -triple=x86_64-apple-macosx10.9.0
diff --git a/test/CodeGen/bitfield-2.c b/test/CodeGen/bitfield-2.c
index c5154fc..9d66957 100644
--- a/test/CodeGen/bitfield-2.c
+++ b/test/CodeGen/bitfield-2.c
@@ -14,7 +14,7 @@
 // CHECK-RECORD:   LLVMType:%struct.s0 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageAlignment:1>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageOffset:0>
 struct __attribute((packed)) s0 {
   int f0 : 24;
 };
@@ -54,8 +54,8 @@
 // CHECK-RECORD:   LLVMType:%struct.s1 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageAlignment:1>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageAlignment:1>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
 
 #pragma pack(push)
 #pragma pack(1)
@@ -102,7 +102,7 @@
 // CHECK-RECORD:   LLVMType:%union.u2 = type { i8 }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageAlignment:1>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0>
 
 union __attribute__((packed)) u2 {
   unsigned long long f0 : 3;
@@ -237,7 +237,7 @@
 /***/
 
 struct s6 {
-  _Bool f0 : 2;
+  unsigned f0 : 2;
 };
 
 struct s6 g6 = { 0xF };
@@ -274,8 +274,8 @@
 // CHECK-RECORD:   LLVMType:%struct.s7 = type { i32, i32, i32, i8, i32, [12 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageAlignment:4>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageAlignment:16>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageOffset:12>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageOffset:16>
 
 struct __attribute__((aligned(16))) s7 {
   int a, b, c;
diff --git a/test/CodeGen/block-byref-aggr.c b/test/CodeGen/block-byref-aggr.c
index 910f6da..7d146a2 100644
--- a/test/CodeGen/block-byref-aggr.c
+++ b/test/CodeGen/block-byref-aggr.c
@@ -16,7 +16,7 @@
 // CHECK:      [[A:%.*]] = alloca [[BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer.
 // CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[A]], i32 0, i32 1
@@ -42,7 +42,7 @@
 // CHECK-NEXT: [[B:%.*]] = alloca [[B_BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer, first for b:
 // CHECK-NEXT: [[B_FORWARDING:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[B]], i32 0, i32 1
diff --git a/test/CodeGen/block-with-perdefinedexpr.c b/test/CodeGen/block-with-perdefinedexpr.c
index 68fdea6..94d67c3 100644
--- a/test/CodeGen/block-with-perdefinedexpr.c
+++ b/test/CodeGen/block-with-perdefinedexpr.c
@@ -5,6 +5,7 @@
 
 void handler( );
 
+__attribute__((used))
 static void (^spd)() = ^()
 {
  handler( ^(){ syslog("%s", __FUNCTION__); } );
diff --git a/test/CodeGen/bmi2-builtins.c b/test/CodeGen/bmi2-builtins.c
index 5aa54fd..b4e3fec 100644
--- a/test/CodeGen/bmi2-builtins.c
+++ b/test/CodeGen/bmi2-builtins.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O3 -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -24,9 +24,9 @@
 unsigned int test_mulx_u32(unsigned int __X, unsigned int __Y,
                                  unsigned int *__P) {
   // CHECK: @test_mulx_u32
-  // CHECK-NOT: mul nuw i64
+  // CHECK-NOT: mul i64
   // B32: @test_mulx_u32
-  // B32: mul nuw i64
+  // B32: mul i64
   return _mulx_u32(__X, __Y, __P);
 }
 
@@ -48,6 +48,6 @@
 unsigned long long test_mulx_u64(unsigned long long __X, unsigned long long __Y,
                                  unsigned long long *__P) {
   // CHECK: @test_mulx_u64
-  // CHECK: mul nuw i128
+  // CHECK: mul i128
   return _mulx_u64(__X, __Y, __P);
 }
diff --git a/test/CodeGen/bounds-checking.c b/test/CodeGen/bounds-checking.c
index d93cd3e..90b7f0f 100644
--- a/test/CodeGen/bounds-checking.c
+++ b/test/CodeGen/bounds-checking.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
-// RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-undefined-trap-on-error -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
 
 // CHECK-LABEL: @f
 double f(int b, int i) {
diff --git a/test/CodeGen/builtin-cpu-supports.c b/test/CodeGen/builtin-cpu-supports.c
new file mode 100644
index 0000000..9681392
--- /dev/null
+++ b/test/CodeGen/builtin-cpu-supports.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s| FileCheck %s
+
+// Test that we have the structure definition, the gep offsets, the name of the
+// global, the bit grab, and the icmp correct.
+extern void a(const char *);
+
+int main() {
+  if (__builtin_cpu_supports("sse4.2"))
+    a("sse4.2");
+
+  // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 3, i32 0)
+  // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256
+  // CHECK: = icmp ne i32 [[AND]], 0
+
+  return 0;
+}
diff --git a/test/CodeGen/builtin-unpredictable.c b/test/CodeGen/builtin-unpredictable.c
new file mode 100644
index 0000000..653f231
--- /dev/null
+++ b/test/CodeGen/builtin-unpredictable.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O1 | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=CHECK_O0
+
+// When optimizing, the builtin should be converted to metadata.
+// When not optimizing, there should be no metadata created for the builtin.
+// In both cases, the builtin should be removed from the code.
+
+void foo();
+void branch(int x) {
+// CHECK-LABEL: define void @branch(
+
+// CHECK-NOT: builtin_unpredictable
+// CHECK: !unpredictable [[METADATA:.+]]
+
+// CHECK_O0-NOT: builtin_unpredictable
+// CHECK_O0-NOT: !unpredictable 
+
+  if (__builtin_unpredictable(x > 0))
+    foo ();
+}
+
+int unpredictable_switch(int x) {
+// CHECK-LABEL: @unpredictable_switch(
+
+// CHECK-NOT: builtin_unpredictable
+// CHECK: !unpredictable [[METADATA:.+]]
+
+// CHECK_O0-NOT: builtin_unpredictable
+// CHECK_O0-NOT: !unpredictable 
+
+  switch(__builtin_unpredictable(x)) {
+  default:
+    return 0;
+  case 0:
+  case 1:
+  case 2:
+    return 1;
+  case 5:
+    return 5;
+  };
+
+  return 0;
+}
+
+// CHECK: [[METADATA]] = !{}
+
diff --git a/test/CodeGen/builtins-arm.c b/test/CodeGen/builtins-arm.c
index 2b81856..4cec84c 100644
--- a/test/CodeGen/builtins-arm.c
+++ b/test/CodeGen/builtins-arm.c
@@ -85,6 +85,44 @@
 // CHECK: call {{.*}} @llvm.prefetch(i8* %{{.*}}, i32 1, i32 3, i32 0)
 }
 
+unsigned mrc() {
+  // CHECK: define i32 @mrc()
+  // CHECK: [[R:%.*]] = {{.*}} call i32 @llvm.arm.mrc(i32 15, i32 0, i32 13, i32 0, i32 3)
+  // CHECK-NEXT: ret i32 [[R]]
+  return __builtin_arm_mrc(15, 0, 13, 0, 3);
+}
+
+unsigned mrc2() {
+  // CHECK: define i32 @mrc2()
+  // CHECK: [[R:%.*]] = {{.*}} call i32 @llvm.arm.mrc2(i32 15, i32 0, i32 13, i32 0, i32 3)
+  // CHECK-NEXT: ret i32 [[R]]
+  return __builtin_arm_mrc2(15, 0, 13, 0, 3);
+}
+
+void mcr(unsigned a) {
+  // CHECK: define void @mcr(i32 [[A:%.*]])
+  // CHECK: call void @llvm.arm.mcr(i32 15, i32 0, i32 [[A]], i32 13, i32 0, i32 3)
+  __builtin_arm_mcr(15, 0, a, 13, 0, 3);
+}
+
+void mcr2(unsigned a) {
+  // CHECK: define void @mcr2(i32 [[A:%.*]])
+  // CHECK: call void @llvm.arm.mcr2(i32 15, i32 0, i32 [[A]], i32 13, i32 0, i32 3)
+  __builtin_arm_mcr2(15, 0, a, 13, 0, 3);
+}
+
+void mcrr(unsigned a, unsigned b) {
+  // CHECK: define void @mcrr(i32 [[A:%.*]], i32 [[B:%.*]])
+  // CHECK: call void @llvm.arm.mcrr(i32 15, i32 0, i32 [[A]], i32 [[B]], i32 0)
+  __builtin_arm_mcrr(15, 0, a, b, 0);
+}
+
+void mcrr2(unsigned a, unsigned b) {
+  // CHECK: define void @mcrr2(i32 [[A:%.*]], i32 [[B:%.*]])
+  // CHECK: call void @llvm.arm.mcrr2(i32 15, i32 0, i32 [[A]], i32 [[B]], i32 0)
+  __builtin_arm_mcrr2(15, 0, a, b, 0);
+}
+
 unsigned rsr() {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i32 @llvm.read_register.i32(metadata !7)
   // CHECK-NEXT: ret i32 [[V0]]
diff --git a/test/CodeGen/builtins-arm64.c b/test/CodeGen/builtins-arm64.c
index b37387a..16e22d7 100644
--- a/test/CodeGen/builtins-arm64.c
+++ b/test/CodeGen/builtins-arm64.c
@@ -5,6 +5,11 @@
 // CHECK: call {{.*}} @__clear_cache
 }
 
+void *tp (void) {
+  return __builtin_thread_pointer ();
+// CHECK: call {{.*}} @llvm.aarch64.thread.pointer()
+}
+
 // CHECK: call {{.*}} @llvm.aarch64.rbit.i32(i32 %a)
 unsigned rbit(unsigned a) {
   return __builtin_arm_rbit(a);
@@ -45,37 +50,37 @@
 }
 
 unsigned rsr() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i64 @llvm.read_register.i64(metadata !1)
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: trunc i64 [[V0]] to i32
   return __builtin_arm_rsr("1:2:3:4:5");
 }
 
 unsigned long rsr64() {
-  // CHECK: call i64 @llvm.read_register.i64(metadata !1)
+  // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   return __builtin_arm_rsr64("1:2:3:4:5");
 }
 
 void *rsrp() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i64 @llvm.read_register.i64(metadata !1)
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: inttoptr i64 [[V0]] to i8*
   return __builtin_arm_rsrp("1:2:3:4:5");
 }
 
 void wsr(unsigned v) {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = zext i32 %v to i64
-  // CHECK-NEXT: call void @llvm.write_register.i64(metadata !1, i64 [[V0]])
+  // CHECK-NEXT: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 [[V0]])
   __builtin_arm_wsr("1:2:3:4:5", v);
 }
 
 void wsr64(unsigned long v) {
-  // CHECK: call void @llvm.write_register.i64(metadata !1, i64 %v)
+  // CHECK: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 %v)
   __builtin_arm_wsr64("1:2:3:4:5", v);
 }
 
 void wsrp(void *v) {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = ptrtoint i8* %v to i64
-  // CHECK-NEXT: call void @llvm.write_register.i64(metadata !1, i64 [[V0]])
+  // CHECK-NEXT: call void @llvm.write_register.i64(metadata ![[M0:[0-9]]], i64 [[V0]])
   __builtin_arm_wsrp("1:2:3:4:5", v);
 }
 
-// CHECK: !1 = !{!"1:2:3:4:5"}
+// CHECK: ![[M0]] = !{!"1:2:3:4:5"}
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index 5f91f7a..745e74f 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -1,8 +1,13 @@
 // REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
 
-int read_tid() {
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+__device__ int read_tid() {
 
 // CHECK: call i32 @llvm.ptx.read.tid.x()
 // CHECK: call i32 @llvm.ptx.read.tid.y()
@@ -18,7 +23,7 @@
 
 }
 
-int read_ntid() {
+__device__ int read_ntid() {
 
 // CHECK: call i32 @llvm.ptx.read.ntid.x()
 // CHECK: call i32 @llvm.ptx.read.ntid.y()
@@ -34,7 +39,7 @@
 
 }
 
-int read_ctaid() {
+__device__ int read_ctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
@@ -50,7 +55,7 @@
 
 }
 
-int read_nctaid() {
+__device__ int read_nctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
@@ -66,7 +71,7 @@
 
 }
 
-int read_ids() {
+__device__ int read_ids() {
 
 // CHECK: call i32 @llvm.ptx.read.laneid()
 // CHECK: call i32 @llvm.ptx.read.warpid()
@@ -86,7 +91,7 @@
 
 }
 
-int read_lanemasks() {
+__device__ int read_lanemasks() {
 
 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
@@ -104,20 +109,18 @@
 
 }
 
-
-long read_clocks() {
+__device__ long long read_clocks() {
 
 // CHECK: call i32 @llvm.ptx.read.clock()
 // CHECK: call i64 @llvm.ptx.read.clock64()
 
   int a = __builtin_ptx_read_clock();
-  long b = __builtin_ptx_read_clock64();
+  long long b = __builtin_ptx_read_clock64();
 
-  return (long)a + b;
-
+  return a + b;
 }
 
-int read_pms() {
+__device__ int read_pms() {
 
 // CHECK: call i32 @llvm.ptx.read.pm0()
 // CHECK: call i32 @llvm.ptx.read.pm1()
@@ -133,7 +136,7 @@
 
 }
 
-void sync() {
+__device__ void sync() {
 
 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
 
@@ -146,7 +149,7 @@
 
 // The idea is not to test all intrinsics, just that Clang is recognizing the
 // builtins defined in BuiltinsNVPTX.def
-void nvvm_math(float f1, float f2, double d1, double d2) {
+__device__ void nvvm_math(float f1, float f2, double d1, double d2) {
 // CHECK: call float @llvm.nvvm.fmax.f
   float t1 = __nvvm_fmax_f(f1, f2);
 // CHECK: call float @llvm.nvvm.fmin.f
@@ -176,3 +179,98 @@
 // CHECK: call void @llvm.nvvm.barrier0()
   __nvvm_bar0();
 }
+
+__device__ int di;
+__shared__ int si;
+__device__ long dl;
+__shared__ long sl;
+__device__ long long dll;
+__shared__ long long sll;
+
+// Check for atomic intrinsics
+// CHECK-LABEL: nvvm_atom
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+                          long long *llp, long long ll) {
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_i(ip, i);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_l(&dl, l);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_i(ip, i);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_l(&dl, l);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_i(ip, i);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_l(&dl, l);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_i(ip, i);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_l(&dl, l);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_i(ip, i);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_l(&dl, l);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_i(ip, i);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_l(&dl, l);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw max i32*
+  __nvvm_atom_max_gen_i(ip, i);
+  // CHECK: atomicrmw umax i32*
+  __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_l(&dl, l);
+  // CHECK: atomicrmw umax
+  __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw max i64*
+  __nvvm_atom_max_gen_ll(&sll, ll);
+  // CHECK: atomicrmw umax i64*
+  __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: atomicrmw min i32*
+  __nvvm_atom_min_gen_i(ip, i);
+  // CHECK: atomicrmw umin i32*
+  __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_l(&dl, l);
+  // CHECK: atomicrmw umin
+  __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw min i64*
+  __nvvm_atom_min_gen_ll(&sll, ll);
+  // CHECK: atomicrmw umin i64*
+  __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
+  __nvvm_atom_cas_gen_i(ip, 0, i);
+  // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0
+  __nvvm_atom_cas_gen_l(&dl, 0, l);
+  // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
+  __nvvm_atom_cas_gen_ll(&sll, 0, ll);
+
+  // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
+  __nvvm_atom_add_gen_f(fp, f);
+
+  // CHECK: ret
+}
diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c
index 5c5500d..c8d828d 100644
--- a/test/CodeGen/builtins-overflow.c
+++ b/test/CodeGen/builtins-overflow.c
@@ -11,6 +11,171 @@
 extern int IntErrorCode;
 extern long LongErrorCode;
 extern long long LongLongErrorCode;
+void overflowed(void);
+
+unsigned test_add_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_sub_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_sub_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_sub_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_sub_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_sub_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_sub_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_mul_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_mul_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_mul_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_mul_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_mul_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_mul_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_uint_int_int(unsigned x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_uint_int_int
+  // CHECK: [[XE:%.+]] = zext i32 %{{.+}} to i33
+  // CHECK: [[YE:%.+]] = sext i32 %{{.+}} to i33
+  // CHECK: [[S:%.+]] = call { i33, i1 } @llvm.sadd.with.overflow.i33(i33 [[XE]], i33 [[YE]])
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i33, i1 } [[S]], 0
+  // CHECK-DAG: [[C1:%.+]] = extractvalue { i33, i1 } [[S]], 1
+  // CHECK: [[QT:%.+]] = trunc i33 [[Q]] to i32
+  // CHECK: [[QTE:%.+]] = sext i32 [[QT]] to i33
+  // CHECK: [[C2:%.+]] = icmp ne i33 [[Q]], [[QTE]]
+  // CHECK: [[C3:%.+]] = or i1 [[C1]], [[C2]]
+  // CHECK: store i32 [[QT]], i32*
+  // CHECK: br i1 [[C3]]
+  int r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+_Bool test_add_overflow_uint_uint_bool(unsigned x, unsigned y) {
+  // CHECK-LABEL: define {{.*}} i1 @test_add_overflow_uint_uint_bool
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C1:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: [[QT:%.+]] = trunc i32 [[Q]] to i1
+  // CHECK: [[QTE:%.+]] = zext i1 [[QT]] to i32
+  // CHECK: [[C2:%.+]] = icmp ne i32 [[Q]], [[QTE]]
+  // CHECK: [[C3:%.+]] = or i1 [[C1]], [[C2]]
+  // CHECK: [[QT2:%.+]] = zext i1 [[QT]] to i8
+  // CHECK: store i8 [[QT2]], i8*
+  // CHECK: br i1 [[C3]]
+  _Bool r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_add_overflow_bool_bool_uint(_Bool x, _Bool y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_bool_bool_uint
+  // CHECK: [[XE:%.+]] = zext i1 %{{.+}} to i32
+  // CHECK: [[YE:%.+]] = zext i1 %{{.+}} to i32
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[XE]], i32 [[YE]])
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+_Bool test_add_overflow_bool_bool_bool(_Bool x, _Bool y) {
+  // CHECK-LABEL: define {{.*}} i1 @test_add_overflow_bool_bool_bool
+  // CHECK: [[S:%.+]] = call { i1, i1 } @llvm.uadd.with.overflow.i1(i1 %{{.+}}, i1 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i1, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i1, i1 } [[S]], 1
+  // CHECK: [[QT2:%.+]] = zext i1 [[Q]] to i8
+  // CHECK: store i8 [[QT2]], i8*
+  // CHECK: br i1 [[C]]
+  _Bool r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_volatile(int x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_volatile
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store volatile i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  volatile int result;
+  if (__builtin_add_overflow(x, y, &result))
+    overflowed();
+  return result;
+}
 
 unsigned test_uadd_overflow(unsigned x, unsigned y) {
 // CHECK: @test_uadd_overflow
diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c
index c6aa3c2..9539d6c 100644
--- a/test/CodeGen/builtins-ppc-altivec.c
+++ b/test/CodeGen/builtins-ppc-altivec.c
@@ -940,6 +940,30 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpeqfp
 
   /* vec_cmpge */
+  res_vbc = vec_cmpge(vsc, vsc);
+// CHECK: @llvm.ppc.altivec.vcmpgtsb
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsb
+
+  res_vbc = vec_cmpge(vuc, vuc);
+// CHECK: @llvm.ppc.altivec.vcmpgtub
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtub
+
+  res_vbs = vec_cmpge(vs, vs);
+// CHECK: @llvm.ppc.altivec.vcmpgtsh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsh
+
+  res_vbs = vec_cmpge(vus, vus);
+// CHECK: @llvm.ppc.altivec.vcmpgtuh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuh
+
+  res_vbi = vec_cmpge(vi, vi);
+// CHECK: @llvm.ppc.altivec.vcmpgtsw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsw
+
+  res_vbi = vec_cmpge(vui, vui);
+// CHECK: @llvm.ppc.altivec.vcmpgtuw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuw
+
   res_vbi = vec_cmpge(vf, vf);
 // CHECK: @llvm.ppc.altivec.vcmpgefp
 // CHECK-LE: @llvm.ppc.altivec.vcmpgefp
@@ -1010,6 +1034,30 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtfp
 
   /* vec_cmple */
+  res_vbc = vec_cmple(vsc, vsc);
+// CHECK: @llvm.ppc.altivec.vcmpgtsb
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsb
+
+  res_vbc = vec_cmple(vuc, vuc);
+// CHECK: @llvm.ppc.altivec.vcmpgtub
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtub
+
+  res_vbs = vec_cmple(vs, vs);
+// CHECK: @llvm.ppc.altivec.vcmpgtsh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsh
+
+  res_vbs = vec_cmple(vus, vus);
+// CHECK: @llvm.ppc.altivec.vcmpgtuh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuh
+
+  res_vbi = vec_cmple(vi, vi);
+// CHECK: @llvm.ppc.altivec.vcmpgtsw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsw
+
+  res_vbi = vec_cmple(vui, vui);
+// CHECK: @llvm.ppc.altivec.vcmpgtuw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuw
+
   res_vbi = vec_cmple(vf, vf);
 // CHECK: @llvm.ppc.altivec.vcmpgefp
 // CHECK-LE: @llvm.ppc.altivec.vcmpgefp
@@ -1081,6 +1129,31 @@
 // CHECK: @llvm.ppc.altivec.vctuxs
 // CHECK-LE: @llvm.ppc.altivec.vctuxs
 
+  /* vec_div */
+  res_vsc = vec_div(vsc, vsc);
+// CHECK: sdiv <16 x i8>
+// CHECK-LE: sdiv <16 x i8>
+
+  res_vuc = vec_div(vuc, vuc);
+// CHECK: udiv <16 x i8>
+// CHECK-LE: udiv <16 x i8>
+
+  res_vs = vec_div(vs, vs);
+// CHECK: sdiv <8 x i16>
+// CHECK-LE: sdiv <8 x i16>
+
+  res_vus = vec_div(vus, vus);
+// CHECK: udiv <8 x i16>
+// CHECK-LE: udiv <8 x i16>
+
+  res_vi = vec_div(vi, vi);
+// CHECK: sdiv <4 x i32>
+// CHECK-LE: sdiv <4 x i32>
+
+  res_vui = vec_div(vui, vui);
+// CHECK: udiv <4 x i32>
+// CHECK-LE: udiv <4 x i32>
+
   /* vec_dss */
   vec_dss(0);
 // CHECK: @llvm.ppc.altivec.dss
@@ -2127,6 +2200,31 @@
 // CHECK: @llvm.ppc.altivec.mtvscr
 // CHECK-LE: @llvm.ppc.altivec.mtvscr
 
+  /* vec_mul */
+  res_vsc = vec_mul(vsc, vsc);
+// CHECK: mul <16 x i8>
+// CHECK-LE: mul <16 x i8>
+
+  res_vuc = vec_mul(vuc, vuc);
+// CHECK: mul <16 x i8>
+// CHECK-LE: mul <16 x i8>
+
+  res_vs = vec_mul(vs, vs);
+// CHECK: mul <8 x i16>
+// CHECK-LE: mul <8 x i16>
+
+  res_vus = vec_mul(vus, vus);
+// CHECK: mul <8 x i16>
+// CHECK-LE: mul <8 x i16>
+
+  res_vi = vec_mul(vi, vi);
+// CHECK: mul <4 x i32>
+// CHECK-LE: mul <4 x i32>
+
+  res_vui = vec_mul(vui, vui);
+// CHECK: mul <4 x i32>
+// CHECK-LE: mul <4 x i32>
+
   /* vec_mule */
   res_vs  = vec_mule(vsc, vsc);
 // CHECK: @llvm.ppc.altivec.vmulesb
@@ -3257,67 +3355,225 @@
 
   /* vec_sld */
   res_vsc = vec_sld(vsc, vsc, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_sld(vuc, vuc, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_sld(vs, vs, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_sld(vus, vus, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
+  res_vbs = vec_sld(vbs, vbs, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
+// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
   res_vp  = vec_sld(vp, vp, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_sld(vi, vi, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_sld(vui, vui, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
+  res_vbi = vec_sld(vbi, vbi, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{.+}}, <4 x i32> {{.+}}, <16 x i8>
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{.+}}, <4 x i32> {{.+}}, <16 x i8>
+
   res_vf  = vec_sld(vf, vf, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vsc = vec_vsldoi(vsc, vsc, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_vsldoi(vuc, vuc, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_vsldoi(vs, vs, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_vsldoi(vus, vus, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vp  = vec_vsldoi(vp, vp, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_vsldoi(vi, vi, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_vsldoi(vui, vui, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vf  = vec_vsldoi(vf, vf, 0);
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3
+// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 16
+// CHECK-LE: sub nsw i32 17
+// CHECK-LE: sub nsw i32 18
+// CHECK-LE: sub nsw i32 31
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_sll */
@@ -3802,28 +4058,28 @@
 
   /* vec_sr */
   res_vsc = vec_sr(vsc, vuc);
-// CHECK: shr <16 x i8>
-// CHECK-LE: shr <16 x i8>
+// CHECK: lshr <16 x i8>
+// CHECK-LE: lshr <16 x i8>
 
   res_vuc = vec_sr(vuc, vuc);
-// CHECK: shr <16 x i8>
-// CHECK-LE: shr <16 x i8>
+// CHECK: lshr <16 x i8>
+// CHECK-LE: lshr <16 x i8>
 
   res_vs  = vec_sr(vs, vus);
-// CHECK: shr <8 x i16>
-// CHECK-LE: shr <8 x i16>
+// CHECK: lshr <8 x i16>
+// CHECK-LE: lshr <8 x i16>
 
   res_vus = vec_sr(vus, vus);
-// CHECK: shr <8 x i16>
-// CHECK-LE: shr <8 x i16>
+// CHECK: lshr <8 x i16>
+// CHECK-LE: lshr <8 x i16>
 
   res_vi  = vec_sr(vi, vui);
-// CHECK: shr <4 x i32>
-// CHECK-LE: shr <4 x i32>
+// CHECK: lshr <4 x i32>
+// CHECK-LE: lshr <4 x i32>
 
   res_vui = vec_sr(vui, vui);
-// CHECK: shr <4 x i32>
-// CHECK-LE: shr <4 x i32>
+// CHECK: lshr <4 x i32>
+// CHECK-LE: lshr <4 x i32>
 
   res_vsc = vec_vsrb(vsc, vuc);
 // CHECK: shr <16 x i8>
@@ -5458,6 +5714,10 @@
 // CHECK: extractelement <16 x i8>
 // CHECK-LE: extractelement <16 x i8>
 
+  res_uc = vec_extract(vbc, param_i);
+// CHECK: extractelement <16 x i8>
+// CHECK-LE: extractelement <16 x i8>
+
   res_s  = vec_extract(vs, param_i);
 // CHECK: extractelement <8 x i16>
 // CHECK-LE: extractelement <8 x i16>
@@ -5466,6 +5726,10 @@
 // CHECK: extractelement <8 x i16>
 // CHECK-LE: extractelement <8 x i16>
 
+  res_us = vec_extract(vbs, param_i);
+// CHECK: extractelement <8 x i16>
+// CHECK-LE: extractelement <8 x i16>
+
   res_i  = vec_extract(vi, param_i);
 // CHECK: extractelement <4 x i32>
 // CHECK-LE: extractelement <4 x i32>
@@ -5474,6 +5738,10 @@
 // CHECK: extractelement <4 x i32>
 // CHECK-LE: extractelement <4 x i32>
 
+  res_ui = vec_extract(vbi, param_i);
+// CHECK: extractelement <4 x i32>
+// CHECK-LE: extractelement <4 x i32>
+
   res_f  = vec_extract(vf, param_i);
 // CHECK: extractelement <4 x float>
 // CHECK-LE: extractelement <4 x float>
@@ -5487,6 +5755,10 @@
 // CHECK: insertelement <16 x i8>
 // CHECK-LE: insertelement <16 x i8>
 
+  res_vbc = vec_insert(param_uc, vbc, param_i);
+// CHECK: insertelement <16 x i8>
+// CHECK-LE: insertelement <16 x i8>
+
   res_vs  = vec_insert(param_s, vs, param_i);
 // CHECK: insertelement <8 x i16>
 // CHECK-LE: insertelement <8 x i16>
@@ -5495,6 +5767,10 @@
 // CHECK: insertelement <8 x i16>
 // CHECK-LE: insertelement <8 x i16>
 
+  res_vbs = vec_insert(param_us, vbs, param_i);
+// CHECK: insertelement <8 x i16>
+// CHECK-LE: insertelement <8 x i16>
+
   res_vi  = vec_insert(param_i, vi, param_i);
 // CHECK: insertelement <4 x i32>
 // CHECK-LE: insertelement <4 x i32>
@@ -5503,6 +5779,10 @@
 // CHECK: insertelement <4 x i32>
 // CHECK-LE: insertelement <4 x i32>
 
+  res_vbi = vec_insert(param_ui, vbi, param_i);
+// CHECK: insertelement <4 x i32>
+// CHECK-LE: insertelement <4 x i32>
+
   res_vf  = vec_insert(param_f, vf, param_i);
 // CHECK: insertelement <4 x float>
 // CHECK-LE: insertelement <4 x float>
diff --git a/test/CodeGen/builtins-ppc-crypto.c b/test/CodeGen/builtins-ppc-crypto.c
index 0ade413..60bdc49 100644
--- a/test/CodeGen/builtins-ppc-crypto.c
+++ b/test/CodeGen/builtins-ppc-crypto.c
@@ -6,10 +6,6 @@
 // RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-unknown \
 // RUN: -target-feature +crypto -target-feature +power8-vector \
 // RUN: -emit-llvm %s -o - | FileCheck %s
-
-// RUN: %clang_cc1 -faltivec -triple powerpc-unknown-unknown \
-// RUN: -target-feature +crypto -target-feature +power8-vector \
-// RUN: -emit-llvm %s -o - | FileCheck %s
 #include <altivec.h>
 #define B_INIT1 { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, \
                   0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10 };
@@ -34,7 +30,7 @@
   vector unsigned char a = B_INIT1
   vector unsigned char b = B_INIT2
   return __builtin_altivec_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
 }
 
 // CHECK-LABEL: define <8 x i16> @test_vpmsumh
@@ -43,7 +39,7 @@
   vector unsigned short a = H_INIT1
   vector unsigned short b = H_INIT2
   return __builtin_altivec_crypto_vpmsumh(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpmsumw
@@ -52,7 +48,7 @@
   vector unsigned int a = W_INIT1
   vector unsigned int b = W_INIT2
   return __builtin_altivec_crypto_vpmsumw(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vpmsumd
@@ -61,7 +57,7 @@
   vector unsigned long long a = D_INIT1
   vector unsigned long long b = D_INIT2
   return __builtin_altivec_crypto_vpmsumd(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vsbox
@@ -130,7 +126,7 @@
 // CHECK: @llvm.ppc.altivec.crypto.vcipherlast
 }
 
-// CHECK: @llvm.ppc.altivec.crypto.vncipher
+// CHECK-LABEL: @test_vncipher
 vector unsigned long long test_vncipher(void)
 {
   vector unsigned long long a = D_INIT1
@@ -172,7 +168,7 @@
   vector unsigned char a = B_INIT1
   vector unsigned char b = B_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
 }
 
 // CHECK-LABEL: define <8 x i16> @test_vpmsumh_e
@@ -181,7 +177,7 @@
   vector unsigned short a = H_INIT1
   vector unsigned short b = H_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpmsumw_e
@@ -190,7 +186,7 @@
   vector unsigned int a = W_INIT1
   vector unsigned int b = W_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vpmsumd_e
@@ -199,7 +195,7 @@
   vector unsigned long long a = D_INIT1
   vector unsigned long long b = D_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vsbox_e
@@ -227,6 +223,7 @@
   vector unsigned short b = H_INIT2
   vector unsigned short c = H_INIT2
   return __builtin_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpermxorw_e
@@ -301,3 +298,99 @@
 // CHECK: @llvm.ppc.altivec.crypto.vshasigmad
 }
 
+// CHECK-LABEL: @test_vec_sbox_be
+vector unsigned char test_vec_sbox_be(void)
+{
+  vector unsigned char a = B_INIT1
+  return vec_sbox_be(a);
+// CHECK: @llvm.ppc.altivec.crypto.vsbox
+}
+
+// CHECK-LABEL: @test_vec_cipher_be
+vector unsigned char test_vec_cipher_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_cipher_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipher
+}
+
+// CHECK-LABEL: @test_vec_cipherlast_be
+vector unsigned char test_vec_cipherlast_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_cipherlast_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipherlast
+}
+
+// CHECK-LABEL: @test_vec_ncipher_be
+vector unsigned char test_vec_ncipher_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_ncipher_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipher
+}
+
+// CHECK-LABEL: @test_vec_ncipherlast_be
+vector unsigned char test_vec_ncipherlast_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_ncipherlast_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipherlast
+}
+
+// CHECK-LABEL: @test_vec_shasigma_bew
+vector unsigned int test_vec_shasigma_bew(void)
+{
+  vector unsigned int a = W_INIT1
+  return vec_shasigma_be(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmaw
+}
+
+// CHECK-LABEL: @test_vec_shasigma_bed
+vector unsigned long long test_vec_shasigma_bed(void)
+{
+  vector unsigned long long a = D_INIT2
+  return vec_shasigma_be(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmad
+}
+
+// CHECK-LABEL: @test_vec_pmsum_beb
+vector unsigned short test_vec_pmsum_beb(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
+}
+
+// CHECK-LABEL: @test_vec_pmsum_beh
+vector unsigned int test_vec_pmsum_beh(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
+}
+
+// CHECK-LABEL: @test_vec_pmsum_bew
+vector unsigned long long test_vec_pmsum_bew(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
+}
+
+// CHECK-LABEL: @test_vec_pmsum_bed
+vector unsigned __int128 test_vec_pmsum_bed(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
+}
+
diff --git a/test/CodeGen/builtins-ppc-p7.c b/test/CodeGen/builtins-ppc-p7.c
index f584410..237a58f 100644
--- a/test/CodeGen/builtins-ppc-p7.c
+++ b/test/CodeGen/builtins-ppc-p7.c
@@ -11,7 +11,7 @@
   int a = 74;
   int b = 32;
   return __builtin_divwe(a, b);
-// CHECK @llvm.ppc.divwe
+// CHECK: @llvm.ppc.divwe
 }
 
 // CHECK-LABEL: define zeroext i32 @test_divweu
@@ -20,7 +20,7 @@
   unsigned int a = 74;
   unsigned int b = 32;
   return __builtin_divweu(a, b);
-// CHECK @llvm.ppc.divweu
+// CHECK: @llvm.ppc.divweu
 }
 
 // CHECK-LABEL: define i64 @test_divde
@@ -29,7 +29,7 @@
   long long a = 74LL;
   long long b = 32LL;
   return __builtin_divde(a, b);
-// CHECK @llvm.ppc.divde
+// CHECK: @llvm.ppc.divde
 }
 
 // CHECK-LABEL: define i64 @test_divdeu
@@ -38,7 +38,7 @@
   unsigned long long a = 74ULL;
   unsigned long long b = 32ULL;
   return __builtin_divdeu(a, b);
-// CHECK @llvm.ppc.divdeu
+// CHECK: @llvm.ppc.divdeu
 }
 
 // CHECK-LABEL: define i64 @test_bpermd
@@ -47,6 +47,6 @@
   long long a = 74LL;
   long long b = 32LL;
   return __builtin_bpermd(a, b);
-// CHECK @llvm.ppc.bpermd
+// CHECK: @llvm.ppc.bpermd
 }
 
diff --git a/test/CodeGen/builtins-ppc-p8vector.c b/test/CodeGen/builtins-ppc-p8vector.c
index ac40790..29503f0 100644
--- a/test/CodeGen/builtins-ppc-p8vector.c
+++ b/test/CodeGen/builtins-ppc-p8vector.c
@@ -1,30 +1,154 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-LE
-// RUN: not %clang_cc1 -faltivec -triple powerpc64-unknown-unknown -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PPC
+// RUN: not %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64-unknown-unknown -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PPC
+// Added -target-feature +vsx above to avoid errors about "vector double" and to
+// generate the correct errors for functions that are only overloaded with VSX
+// (vec_cmpge, vec_cmple). Without this option, there is only one overload so
+// it is selected.
 
+void dummy() { }
+signed int si;
+signed long long sll;
+unsigned long long ull;
+signed __int128 sx;
+unsigned __int128 ux;
+double d;
 vector signed char vsc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
 vector unsigned char vuc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
-vector int vi = { -1, 2, -3, 4 };
+vector bool char vbc = { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1 };
+
+vector signed short vss = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector unsigned short vus = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector bool short vbs = { 1, 1, 0, 0, 0, 0, 1, 1 };
+
+vector signed int vsi = { -1, 2, -3, 4 };
 vector unsigned int vui = { 1, 2, 3, 4 };
 vector bool int vbi = {0, -1, -1, 0};
-vector bool long long vbll = { 1, 0 };
+
 vector signed long long vsll = { 1, 2 };
 vector unsigned long long vull = { 1, 2 };
+vector bool long long vbll = { 1, 0 };
+
+vector signed __int128 vsx = { 1 };
+vector unsigned __int128 vux = { 1 };
+
+vector float vfa = { 1.e-4f, -132.23f, -22.1, 32.00f };
+vector double vda = { 1.e-11, -132.23e10 };
 
 int res_i;
+double res_d;
+signed long long res_sll;
+unsigned long long res_ull;
+
 vector signed char res_vsc;
 vector unsigned char res_vuc;
-vector int res_vi;
+vector bool char res_vbc;
+
+vector signed short res_vss;
+vector unsigned short res_vus;
+vector bool short res_vbs;
+
+vector signed int res_vsi;
 vector unsigned int res_vui;
 vector bool int res_vbi;
-vector bool long long res_vbll;
+
 vector signed long long res_vsll;
 vector unsigned long long res_vull;
+vector bool long long res_vbll;
+
+vector signed __int128 res_vsx;
+vector unsigned __int128 res_vux;
+
+vector float res_vf;
+vector double res_vd;
 
 // CHECK-LABEL: define void @test1
 void test1() {
 
+  /* vec_abs */
+  res_vsll = vec_abs(vsll);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %{{[0-9]*}}, <2 x i64>
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %{{[0-9]*}}, <2 x i64>
+// CHECK-PPC: error: call to 'vec_abs' is ambiguous
+
+  res_vd = vec_abs(vda);
+// CHECK: store <2 x i64> <i64 9223372036854775807, i64 9223372036854775807>, <2 x i64>*
+// CHECK: and <2 x i64>
+// CHECK-LE: store <2 x i64> <i64 9223372036854775807, i64 9223372036854775807>, <2 x i64>*
+// CHECK-LE: and <2 x i64>
+// CHECK-PPC: error: call to 'vec_abs' is ambiguous
+
+  /* vec_add */
+  res_vsll = vec_add(vsll, vsll);
+// CHECK: add <2 x i64>
+// CHECK-LE: add <2 x i64>
+// CHECK-PPC: error: call to 'vec_add' is ambiguous
+
+  res_vull = vec_add(vull, vull);
+// CHECK: add <2 x i64>
+// CHECK-LE: add <2 x i64>
+// CHECK-PPC: error: call to 'vec_add' is ambiguous
+
+  /* vec_addc */
+  res_vsi = vec_addc(vsi, vsi);
+// CHECK: @llvm.ppc.altivec.vaddcuw
+// CHECK-LE: @llvm.ppc.altivec.vaddcuw
+
+  res_vui = vec_addc(vui, vui);
+// CHECK: @llvm.ppc.altivec.vaddcuw
+// CHECK-LE: @llvm.ppc.altivec.vaddcuw
+
+  res_vsx = vec_addc(vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddcuq
+// CHECK-LE: @llvm.ppc.altivec.vaddcuq
+
+  res_vux = vec_addc(vux, vux);
+// CHECK: @llvm.ppc.altivec.vaddcuq
+// CHECK-LE: @llvm.ppc.altivec.vaddcuq
+
+  /* vec_adde */
+  res_vsx = vec_adde(vsx, vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddeuqm
+// CHECK-LE: @llvm.ppc.altivec.vaddeuqm
+
+  res_vux = vec_adde(vux, vux, vux);
+// CHECK: @llvm.ppc.altivec.vaddeuqm
+// CHECK-LE: @llvm.ppc.altivec.vaddeuqm
+
+  /* vec_addec */
+  res_vsx = vec_addec(vsx, vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddecuq
+// CHECK-LE: @llvm.ppc.altivec.vaddecuq
+
+  /* vec_mergee */  
+  res_vbi = vec_mergee(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+  
+  res_vsi = vec_mergee(vsi, vsi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vui = vec_mergee(vui, vui);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+// CHECK-PPC: warning: implicit declaration of function 'vec_mergee'
+
+  /* vec_mergeo */
+  res_vbi = vec_mergeo(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vsi = vec_mergeo(vsi, vsi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vui = vec_mergeo(vui, vui);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+// CHECK-PPC: warning: implicit declaration of function 'vec_mergeo'
+  
   /* vec_cmpeq */
   res_vbll = vec_cmpeq(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd
@@ -36,6 +160,28 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd
 // CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous
 
+  /* vec_cmpge */
+  res_vbll = vec_cmpge(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-PPC: error: call to 'vec_cmpge' is ambiguous
+
+  res_vbll = vec_cmpge(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
+// CHECK-PPC: error: call to 'vec_cmpge' is ambiguous
+
+  /* vec_cmple */
+  res_vbll = vec_cmple(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-PPC: error: call to 'vec_cmple' is ambiguous
+
+  res_vbll = vec_cmple(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
+// CHECK-PPC: error: call to 'vec_cmple' is ambiguous
+
   /* vec_cmpgt */
   res_vbll = vec_cmpgt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd
@@ -47,6 +193,231 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud
 // CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous
 
+  /* vec_cmplt */
+  res_vbll = vec_cmplt(vsll, vsll);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-PPC: error: call to 'vec_cmplt' is ambiguous
+
+  res_vbll = vec_cmplt(vull, vull);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-PPC: error: call to 'vec_cmplt' is ambiguous
+
+  /* vec_double */
+  res_vd = vec_double(vsll);
+// CHECK: sitofp i64 {{.+}} to double
+// CHECK-BE: sitofp i64 {{.+}} to double
+
+  res_vd = vec_double(vull);
+// CHECK: uitofp i64 {{.+}} to double
+// CHECK-BE: uitofp i64 {{.+}} to double
+
+  /* vec_eqv */
+  res_vsc =  vec_eqv(vsc, vsc);
+// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-PPC: error: assigning to
+
+  res_vsc =  vec_eqv(vbc, vbc);
+// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-PPC: error: assigning to
+
+  res_vuc =  vec_eqv(vuc, vuc);
+// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
+// CHECK-PPC: error: assigning to
+
+  res_vss =  vec_eqv(vss, vss);
+// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-PPC: error: assigning to
+
+  res_vss =  vec_eqv(vbs, vbs);
+// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-PPC: error: assigning to
+
+  res_vus =  vec_eqv(vus, vus);
+// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
+// CHECK-PPC: error: assigning to
+
+  res_vsi =  vec_eqv(vsi, vsi);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-PPC: error: assigning to
+
+  res_vsi =  vec_eqv(vbi, vbi);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-PPC: error: assigning to
+
+  res_vui =  vec_eqv(vui, vui);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
+// CHECK-PPC: error: assigning to
+
+  res_vsll =  vec_eqv(vsll, vsll);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-PPC: error: assigning to
+
+  res_vsll =  vec_eqv(vbll, vbll);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-PPC: error: assigning to
+
+  res_vull =  vec_eqv(vull, vull);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
+// CHECK-PPC: error: assigning to
+
+  res_vf = vec_eqv(vfa, vfa);
+// CHECK: [[T1:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <4 x float>
+// CHECK-LE: [[T1:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <4 x float>
+// CHECK-PPC: error: assigning to
+
+  res_vd = vec_eqv(vda, vda);
+// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK: bitcast <4 x i32> [[T3]] to <2 x double>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
+// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x double>
+// CHECK-PPC: error: assigning to
+
+  /* vec_extract */
+  res_sll = vec_extract(vsll, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
+
+  res_ull = vec_extract(vull, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
+
+  res_ull = vec_extract(vbll, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
+
+  res_d = vec_extract(vda, si);
+// CHECK: extractelement <2 x double>
+// CHECK-LE: extractelement <2 x double>
+
+  /* vec_insert */
+  res_vsll = vec_insert(sll, vsll, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vbll = vec_insert(ull, vbll, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vull = vec_insert(ull, vull, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vd = vec_insert(d, vda, si);
+// CHECK: insertelement <2 x double>
+// CHECK-LE: insertelement <2 x double>
+
+  /* vec_cntlz */
+  res_vsc = vec_cntlz(vsc);
+// CHECK: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
+// CHECK-LE: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
+// CHECK-PPC: warning: implicit declaration of function 'vec_cntlz' is invalid in C99
+
+  res_vuc = vec_cntlz(vuc);
+// CHECK: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
+// CHECK-LE: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
+
+  res_vss = vec_cntlz(vss);
+// CHECK: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.+}}, i1 false)
+// CHECK-LE: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.+}}, i1 false)
+
+  res_vus = vec_cntlz(vus);
+// CHECK: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.+}}, i1 false)
+// CHECK-LE: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.+}}, i1 false)
+
+  res_vsi = vec_cntlz(vsi);
+// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.+}}, i1 false)
+// CHECK-LE: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.+}}, i1 false)
+
+  res_vui = vec_cntlz(vui);
+// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.+}}, i1 false)
+// CHECK-LE: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.+}}, i1 false)
+
+  res_vsll = vec_cntlz(vsll);
+// CHECK: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.+}}, i1 false)
+// CHECK-LE: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.+}}, i1 false)
+
+  res_vull = vec_cntlz(vull);
+// CHECK: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.+}}, i1 false)
+// CHECK-LE: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.+}}, i1 false)
+
   /* ----------------------- predicates --------------------------- */
   /* vec_all_eq */
   res_i = vec_all_eq(vsll, vsll);
@@ -84,6 +455,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
 
+  res_i = vec_all_eq(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_all_ne */
   res_i = vec_all_ne(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -120,6 +495,24 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
 
+  dummy();
+// CHECK: @dummy
+
+  res_i = vec_all_ne(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
+  dummy();
+// CHECK: @dummy
+
+  res_i = vec_all_nge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
+  res_i = vec_all_ngt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_any_eq */
   res_i = vec_any_eq(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -156,6 +549,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
 
+  res_i = vec_any_eq(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_any_ne */
   res_i = vec_any_ne(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -192,6 +589,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
 
+  res_i = vec_any_ne(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_all_ge */
   res_i = vec_all_ge(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -228,6 +629,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
 
+  res_i = vec_all_ge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_all_gt */
   res_i = vec_all_gt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -264,6 +669,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
 
+  res_i = vec_all_gt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_all_le */
   res_i = vec_all_le(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -300,6 +709,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_le' is ambiguous
 
+  res_i = vec_all_le(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_all_lt */
   res_i = vec_all_lt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -336,6 +749,14 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
 
+  res_i = vec_all_lt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
+  res_i = vec_all_nan(vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_any_ge */
   res_i = vec_any_ge(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -372,6 +793,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
 
+  res_i = vec_any_ge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_any_gt */
   res_i = vec_any_gt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -408,6 +833,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
 
+  res_i = vec_any_gt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_any_le */
   res_i = vec_any_le(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -444,6 +873,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_le' is ambiguous
 
+  res_i = vec_any_le(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_any_lt */
   res_i = vec_any_lt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -480,6 +913,10 @@
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
 
+  res_i = vec_any_lt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_max */
   res_vsll = vec_max(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vmaxsd
@@ -511,6 +948,15 @@
 // CHECK-LE: @llvm.ppc.altivec.vmaxud
 // CHECK-PPC: error: call to 'vec_max' is ambiguous
 
+  /* vec_mergeh */
+  res_vbll = vec_mergeh(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vbll = vec_mergel(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
   /* vec_min */
   res_vsll = vec_min(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vminsd
@@ -543,7 +989,7 @@
 // CHECK-PPC: error: call to 'vec_min' is ambiguous
 
   /* vec_mule */
-  res_vsll = vec_mule(vi, vi);
+  res_vsll = vec_mule(vsi, vsi);
 // CHECK: @llvm.ppc.altivec.vmulesw
 // CHECK-LE: @llvm.ppc.altivec.vmulosw
 // CHECK-PPC: error: call to 'vec_mule' is ambiguous
@@ -554,7 +1000,7 @@
 // CHECK-PPC: error: call to 'vec_mule' is ambiguous
 
   /* vec_mulo */
-  res_vsll = vec_mulo(vi, vi);
+  res_vsll = vec_mulo(vsi, vsi);
 // CHECK: @llvm.ppc.altivec.vmulosw
 // CHECK-LE: @llvm.ppc.altivec.vmulesw
 // CHECK-PPC: error: call to 'vec_mulo' is ambiguous
@@ -565,7 +1011,7 @@
 // CHECK-PPC: error: call to 'vec_mulo' is ambiguous
 
   /* vec_packs */
-  res_vi = vec_packs(vsll, vsll);
+  res_vsi = vec_packs(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vpksdss
 // CHECK-LE: @llvm.ppc.altivec.vpksdss
 // CHECK-PPC: error: call to 'vec_packs' is ambiguous
@@ -610,8 +1056,8 @@
 
   /* vec_sr */
   res_vsll = vec_sr(vsll, vull);
-// CHECK: ashr <2 x i64>
-// CHECK-LE: ashr <2 x i64>
+// CHECK: lshr <2 x i64>
+// CHECK-LE: lshr <2 x i64>
 // CHECK-PPC: error: call to 'vec_sr' is ambiguous
 
   res_vull = vec_sr(vull, vull);
@@ -630,8 +1076,30 @@
 // CHECK-LE: ashr <2 x i64>
 // CHECK-PPC: error: call to 'vec_sra' is ambiguous
 
+  /* vec_splats */
+  res_vsll = vec_splats(sll);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vull = vec_splats(ull);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vsx = vec_splats(sx);
+// CHECK: insertelement <1 x i128>
+// CHECK-LE: insertelement <1 x i128>
+
+  res_vux = vec_splats(ux);
+// CHECK: insertelement <1 x i128>
+// CHECK-LE: insertelement <1 x i128>
+
+  res_vd = vec_splats(d);
+// CHECK: insertelement <2 x double>
+// CHECK-LE: insertelement <2 x double>
+
+
   /* vec_unpackh */
-  res_vsll = vec_unpackh(vi);
+  res_vsll = vec_unpackh(vsi);
 // CHECK: llvm.ppc.altivec.vupkhsw
 // CHECK-LE: llvm.ppc.altivec.vupklsw
 // CHECK-PPC: error: call to 'vec_unpackh' is ambiguous
@@ -642,7 +1110,7 @@
 // CHECK-PPC: error: call to 'vec_unpackh' is ambiguous
 
   /* vec_unpackl */
-  res_vsll = vec_unpackl(vi);
+  res_vsll = vec_unpackl(vsi);
 // CHECK: llvm.ppc.altivec.vupklsw
 // CHECK-LE: llvm.ppc.altivec.vupkhsw
 // CHECK-PPC: error: call to 'vec_unpackl' is ambiguous
@@ -653,7 +1121,7 @@
 // CHECK-PPC: error: call to 'vec_unpackl' is ambiguous
 
   /* vec_vpksdss */
-  res_vi = vec_vpksdss(vsll, vsll);
+  res_vsi = vec_vpksdss(vsll, vsll);
 // CHECK: llvm.ppc.altivec.vpksdss
 // CHECK-LE: llvm.ppc.altivec.vpksdss
 // CHECK-PPC: warning: implicit declaration of function 'vec_vpksdss'
@@ -665,7 +1133,7 @@
 // CHECK-PPC: warning: implicit declaration of function 'vec_vpksdus'
 
   /* vec_vpkudum */
-  res_vi = vec_vpkudum(vsll, vsll);
+  res_vsi = vec_vpkudum(vsll, vsll);
 // CHECK: vperm
 // CHECK-LE: vperm
 // CHECK-PPC: warning: implicit declaration of function 'vec_vpkudum'
@@ -680,7 +1148,7 @@
 // CHECK-PPC: warning: implicit declaration of function 'vec_vpkudus'
 
   /* vec_vupkhsw */
-  res_vsll = vec_vupkhsw(vi);
+  res_vsll = vec_vupkhsw(vsi);
 // CHECK: llvm.ppc.altivec.vupkhsw
 // CHECK-LE: llvm.ppc.altivec.vupklsw
 // CHECK-PPC: warning: implicit declaration of function 'vec_vupkhsw'
@@ -690,7 +1158,7 @@
 // CHECK-LE: llvm.ppc.altivec.vupklsw
 
   /* vec_vupklsw */
-  res_vsll = vec_vupklsw(vi);
+  res_vsll = vec_vupklsw(vsi);
 // CHECK: llvm.ppc.altivec.vupklsw
 // CHECK-LE: llvm.ppc.altivec.vupkhsw
 // CHECK-PPC: warning: implicit declaration of function 'vec_vupklsw'
@@ -741,6 +1209,271 @@
 // CHECK: @llvm.ppc.altivec.vminud
 // CHECK-LE: @llvm.ppc.altivec.vminud
 
+  /* vec_nand */
+  res_vsc = vec_nand(vsc, vsc);
+// CHECK: [[T1:%.+]] = and <16 x i8>
+// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = and <16 x i8>
+// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-PPC: warning: implicit declaration of function 'vec_nand' is invalid in C99
+
+  res_vsc = vec_nand(vbc, vbc);
+// CHECK: [[T1:%.+]] = and <16 x i8>
+// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = and <16 x i8>
+// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  
+  res_vuc = vec_nand(vuc, vuc);
+// CHECK: [[T1:%.+]] = and <16 x i8>
+// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = and <16 x i8>
+// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  
+  res_vss = vec_nand(vss, vss);
+// CHECK: [[T1:%.+]] = and <8 x i16>
+// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = and <8 x i16>
+// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+
+  res_vss = vec_nand(vbs, vbs);
+// CHECK: [[T1:%.+]] = and <8 x i16>
+// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = and <8 x i16>
+// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+
+  res_vus = vec_nand(vus, vus);
+// CHECK: [[T1:%.+]] = and <8 x i16>
+// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = and <8 x i16>
+// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+
+  res_vsi = vec_nand(vsi, vsi);
+// CHECK: [[T1:%.+]] = and <4 x i32>
+// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = and <4 x i32>
+// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+
+  res_vsi = vec_nand(vbi, vbi);
+// CHECK: [[T1:%.+]] = and <4 x i32>
+// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = and <4 x i32>
+// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+
+  res_vui = vec_nand(vui, vui);
+// CHECK: [[T1:%.+]] = and <4 x i32>
+// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = and <4 x i32>
+// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+
+  res_vsll = vec_nand(vsll, vsll);
+// CHECK: [[T1:%.+]] = and <2 x i64>
+// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = and <2 x i64>
+// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+
+  res_vsll = vec_nand(vbll, vbll);
+// CHECK: [[T1:%.+]] = and <2 x i64>
+// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = and <2 x i64>
+// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+
+  res_vull = vec_nand(vull, vull);
+// CHECK: [[T1:%.+]] = and <2 x i64>
+// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = and <2 x i64>
+// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+
+  /* vec_orc */
+  res_vsc = vec_orc(vsc, vsc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-PPC: warning: implicit declaration of function 'vec_orc' is invalid in C99
+
+  res_vsc = vec_orc(vsc, vbc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vsc = vec_orc(vbc, vsc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vuc = vec_orc(vuc, vuc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vuc = vec_orc(vuc, vbc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vuc = vec_orc(vbc, vuc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vbc = vec_orc(vbc, vbc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vss = vec_orc(vss, vss);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vss = vec_orc(vss, vbs);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vss = vec_orc(vbs, vss);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vus = vec_orc(vus, vus);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vus = vec_orc(vus, vbs);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vus = vec_orc(vbs, vus);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vbs = vec_orc(vbs, vbs);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vsi = vec_orc(vsi, vsi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vsi = vec_orc(vsi, vbi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vsi = vec_orc(vbi, vsi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vui = vec_orc(vui, vui);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vui = vec_orc(vui, vbi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vui = vec_orc(vbi, vui);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vbi = vec_orc(vbi, vbi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vsll = vec_orc(vsll, vsll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vsll = vec_orc(vsll, vbll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vsll = vec_orc(vbll, vsll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vull = vec_orc(vull, vull);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vull = vec_orc(vull, vbll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vull = vec_orc(vbll, vull);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  res_vbll = vec_orc(vbll, vbll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  /* vec_sub */
+  res_vsll = vec_sub(vsll, vsll);
+// CHECK: sub <2 x i64>
+// CHECK-LE: sub <2 x i64>
+
+  res_vull = vec_sub(vull, vull);
+// CHECK: sub <2 x i64>
+// CHECK-LE: sub <2 x i64>
+
+  res_vd = vec_sub(vda, vda);
+// CHECK: fsub <2 x double>
+// CHECK-LE: fsub <2 x double>
+
+  res_vsx = vec_sub(vsx, vsx);
+// CHECK: sub <1 x i128>
+// CHECK-LE: sub <1 x i128>
+
+  res_vux = vec_sub(vux, vux);
+// CHECK: sub <1 x i128>
+// CHECK-LE: sub <1 x i128>
+
   /* vec_vbpermq */
   res_vsll = vec_vbpermq(vsc, vsc);
 // CHECK: llvm.ppc.altivec.vbpermq
@@ -760,4 +1493,14 @@
 // CHECK: llvm.ppc.altivec.vgbbd
 // CHECK-LE: llvm.ppc.altivec.vgbbd
 // CHECK-PPC: warning: implicit declaration of function 'vec_vgbbd'
+
+  res_vuc = vec_gb(vuc);
+// CHECK: llvm.ppc.altivec.vgbbd
+// CHECK-LE: llvm.ppc.altivec.vgbbd
+// CHECK-PPC: warning: implicit declaration of function 'vec_gb'
+
+  res_vull = vec_bperm(vux, vux);
+// CHECK: llvm.ppc.altivec.vbpermq
+// CHECK-LE: llvm.ppc.altivec.vbpermq
+// CHECK-PPC: warning: implicit declaration of function 'vec_bperm'
 }
diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c
index 631cb6c..9a40d30 100644
--- a/test/CodeGen/builtins-ppc-vsx.c
+++ b/test/CodeGen/builtins-ppc-vsx.c
@@ -1,5 +1,6 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-LE
 
 vector unsigned char vuc = { 8,  9, 10, 11, 12, 13, 14, 15,
                              0,  1,  2,  3,  4,  5,  6,  7};
@@ -16,278 +17,832 @@
 vector double res_vd;
 vector signed int res_vsi;
 vector unsigned int res_vui;
+vector bool int res_vbi;
 vector bool long long res_vbll;
 vector signed long long res_vsll;
 vector unsigned long long res_vull;
 double res_d;
 
+void dummy() { }
+
 void test1() {
 // CHECK-LABEL: define void @test1
+// CHECK-LE-LABEL: define void @test1
+
+  res_vd = vec_add(vd, vd);
+// CHECK: fadd <2 x double>
+// CHECK-LE: fadd <2 x double>
+
+  res_vd = vec_and(vbll, vd);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK-LE: and <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  res_vd = vec_and(vd, vbll);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK-LE: and <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  res_vd = vec_and(vd, vd);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK-LE: and <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_andc(vbll, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK-LE: and <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_andc(vd, vbll);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+// CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK-LE: and <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_andc(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_ceil(vd);
+// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})
+
+  res_vf = vec_ceil(vf);
+// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpeq(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpeq(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpge(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpge(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpgt(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpgt(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmple(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmple(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmplt(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmplt(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  /* vec_cpsgn */
+  res_vf = vec_cpsgn(vf, vf);
+// CHECK: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}})
+// CHECK-LE: call <4 x float> @llvm.copysign.v4f32(<4 x float> %{{.+}}, <4 x float> %{{.+}})
+
+  res_vd = vec_cpsgn(vd, vd);
+// CHECK: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}})
+// CHECK-LE: call <2 x double> @llvm.copysign.v2f64(<2 x double> %{{.+}}, <2 x double> %{{.+}})
 
   /* vec_div */
+  res_vsll = vec_div(vsll, vsll);
+// CHECK: sdiv <2 x i64>
+// CHECK-LE: sdiv <2 x i64>
+
+  res_vull = vec_div(vull, vull);
+// CHECK: udiv <2 x i64>
+// CHECK-LE: udiv <2 x i64>
+
   res_vf = vec_div(vf, vf);
-// CHECK: @llvm.ppc.vsx.xvdivsp
+// CHECK: fdiv <4 x float>
+// CHECK-LE: fdiv <4 x float>
 
   res_vd = vec_div(vd, vd);
-// CHECK: @llvm.ppc.vsx.xvdivdp
+// CHECK: fdiv <2 x double>
+// CHECK-LE: fdiv <2 x double>
 
   /* vec_max */
   res_vf = vec_max(vf, vf);
 // CHECK: @llvm.ppc.vsx.xvmaxsp
+// CHECK-LE: @llvm.ppc.vsx.xvmaxsp
 
   res_vd = vec_max(vd, vd);
 // CHECK: @llvm.ppc.vsx.xvmaxdp
+// CHECK-LE: @llvm.ppc.vsx.xvmaxdp
 
   res_vf = vec_vmaxfp(vf, vf);
 // CHECK: @llvm.ppc.vsx.xvmaxsp
+// CHECK-LE: @llvm.ppc.vsx.xvmaxsp
 
   /* vec_min */
   res_vf = vec_min(vf, vf);
 // CHECK: @llvm.ppc.vsx.xvminsp
+// CHECK-LE: @llvm.ppc.vsx.xvminsp
 
   res_vd = vec_min(vd, vd);
 // CHECK: @llvm.ppc.vsx.xvmindp
+// CHECK-LE: @llvm.ppc.vsx.xvmindp
 
   res_vf = vec_vminfp(vf, vf);
 // CHECK: @llvm.ppc.vsx.xvminsp
+// CHECK-LE: @llvm.ppc.vsx.xvminsp
 
   res_d = __builtin_vsx_xsmaxdp(d, d);
 // CHECK: @llvm.ppc.vsx.xsmaxdp
+// CHECK-LE: @llvm.ppc.vsx.xsmaxdp
 
   res_d = __builtin_vsx_xsmindp(d, d);
 // CHECK: @llvm.ppc.vsx.xsmindp
+// CHECK-LE: @llvm.ppc.vsx.xsmindp
 
   /* vec_perm */
   res_vsll = vec_perm(vsll, vsll, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vull = vec_perm(vull, vull, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vbll = vec_perm(vbll, vbll, vuc);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
+  res_vf = vec_round(vf);
+// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float>
+// CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float>
+
+  res_vd = vec_round(vd);
+// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
+// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double>
 
   res_vd = vec_perm(vd, vd, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vd = vec_splat(vd, 1);
+// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
+  res_vbll = vec_splat(vbll, 1);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
+  res_vsll =  vec_splat(vsll, 1);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
+  res_vull =  vec_splat(vull, 1);
+// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+// CHECK-LE: xor <16 x i8>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
+// CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> [[T1]], <4 x i32> [[T2]], <16 x i8>
+
+  res_vsi = vec_pack(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vui = vec_pack(vull, vull);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vbi = vec_pack(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vsll = vec_vperm(vsll, vsll, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vull = vec_vperm(vull, vull, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vd = vec_vperm(vd, vd, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_vsx_ld */
 
   res_vsi = vec_vsx_ld(0, &vsi);
 // CHECK: @llvm.ppc.vsx.lxvw4x
+// CHECK-LE: @llvm.ppc.vsx.lxvw4x
 
   res_vui = vec_vsx_ld(0, &vui);
 // CHECK: @llvm.ppc.vsx.lxvw4x
+// CHECK-LE: @llvm.ppc.vsx.lxvw4x
 
   res_vf = vec_vsx_ld (0, &vf);
 // CHECK: @llvm.ppc.vsx.lxvw4x
+// CHECK-LE: @llvm.ppc.vsx.lxvw4x
 
   res_vsll = vec_vsx_ld(0, &vsll);
 // CHECK: @llvm.ppc.vsx.lxvd2x
+// CHECK-LE: @llvm.ppc.vsx.lxvd2x
 
   res_vull = vec_vsx_ld(0, &vull);
 // CHECK: @llvm.ppc.vsx.lxvd2x
+// CHECK-LE: @llvm.ppc.vsx.lxvd2x
 
   res_vd = vec_vsx_ld(0, &vd);
 // CHECK: @llvm.ppc.vsx.lxvd2x
+// CHECK-LE: @llvm.ppc.vsx.lxvd2x
 
   /* vec_vsx_st */
 
   vec_vsx_st(vsi, 0, &res_vsi);
 // CHECK: @llvm.ppc.vsx.stxvw4x
+// CHECK-LE: @llvm.ppc.vsx.stxvw4x
 
   vec_vsx_st(vui, 0, &res_vui);
 // CHECK: @llvm.ppc.vsx.stxvw4x
+// CHECK-LE: @llvm.ppc.vsx.stxvw4x
 
   vec_vsx_st(vf, 0, &res_vf);
 // CHECK: @llvm.ppc.vsx.stxvw4x
+// CHECK-LE: @llvm.ppc.vsx.stxvw4x
 
   vec_vsx_st(vsll, 0, &res_vsll);
 // CHECK: @llvm.ppc.vsx.stxvd2x
+// CHECK-LE: @llvm.ppc.vsx.stxvd2x
 
   vec_vsx_st(vull, 0, &res_vull);
 // CHECK: @llvm.ppc.vsx.stxvd2x
+// CHECK-LE: @llvm.ppc.vsx.stxvd2x
 
   vec_vsx_st(vd, 0, &res_vd);
 // CHECK: @llvm.ppc.vsx.stxvd2x
+// CHECK-LE: @llvm.ppc.vsx.stxvd2x
 
   /* vec_and */
   res_vsll = vec_and(vsll, vsll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_and(vbll, vsll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_and(vsll, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_and(vull, vull);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_and(vbll, vull);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_and(vull, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vbll = vec_and(vbll, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   /* vec_vand */
   res_vsll = vec_vand(vsll, vsll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_vand(vbll, vsll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_vand(vsll, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_vand(vull, vull);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_vand(vbll, vull);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_vand(vull, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vbll = vec_vand(vbll, vbll);
 // CHECK: and <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   /* vec_andc */
   res_vsll = vec_andc(vsll, vsll);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_andc(vbll, vsll);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vsll = vec_andc(vsll, vbll);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_andc(vull, vull);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_andc(vbll, vull);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vull = vec_andc(vull, vbll);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
 
   res_vbll = vec_andc(vbll, vbll);
 // CHECK: xor <2 x i64>
 // CHECK: and <2 x i64>
+// CHECK-LE: xor <2 x i64>
+// CHECK-LE: and <2 x i64>
+
+  res_vf = vec_floor(vf);
+// CHECK: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.floor.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_floor(vd);
+// CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_madd(vf, vf, vf);
+// CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+
+  res_vd = vec_madd(vd, vd, vd);
+// CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+
+  /* vec_mergeh */
+  res_vsll = vec_mergeh(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vsll = vec_mergeh(vsll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vsll = vec_mergeh(vbll, vsll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergeh(vull, vull);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergeh(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergeh(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  /* vec_mergel */
+  res_vsll = vec_mergel(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vsll = vec_mergel(vsll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vsll = vec_mergel(vbll, vsll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergel(vull, vull);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergel(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vull = vec_mergel(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  /* vec_msub */
+  res_vf = vec_msub(vf, vf, vf);
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+
+  res_vd = vec_msub(vd, vd, vd);
+// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+// CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-LE-NEXT: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+
+  res_vsll = vec_mul(vsll, vsll);
+// CHECK: mul <2 x i64>
+// CHECK-LE: mul <2 x i64>
+
+  res_vull = vec_mul(vull, vull);
+// CHECK: mul <2 x i64>
+// CHECK-LE: mul <2 x i64>
+
+  res_vf = vec_mul(vf, vf);
+// CHECK: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: fmul <4 x float> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vd = vec_mul(vd, vd);
+// CHECK: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vf = vec_nearbyint(vf);
+// CHECK: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.round.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_nearbyint(vd);
+// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_nmadd(vf, vf, vf);
+// CHECK: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]
+// CHECK-LE: [[FM:[0-9]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-LE-NEXT: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %[[FM]]
+
+  res_vd = vec_nmadd(vd, vd, vd);
+// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
+// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
+
+  res_vf = vec_nmsub(vf, vf, vf);
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+// CHECK: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float>
+// CHECK-LE: fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{[0-9]+}}
+
+  res_vd = vec_nmsub(vd, vd, vd);
+// CHECK: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+// CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
+// CHECK-LE: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{[0-9]+}}
+// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double>
+// CHECK-LE-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %[[FM]]
 
   /* vec_nor */
   res_vsll = vec_nor(vsll, vsll);
 // CHECK: or <2 x i64>
 // CHECK: xor <2 x i64>
+// CHECK-LE: or <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_nor(vull, vull);
 // CHECK: or <2 x i64>
 // CHECK: xor <2 x i64>
+// CHECK-LE: or <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_nor(vbll, vbll);
 // CHECK: or <2 x i64>
 // CHECK: xor <2 x i64>
+// CHECK-LE: or <2 x i64>
+// CHECK-LE: xor <2 x i64>
+
+  res_vd = vec_nor(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>
+// CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>
 
   /* vec_or */
   res_vsll = vec_or(vsll, vsll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vsll = vec_or(vbll, vsll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vsll = vec_or(vsll, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_or(vull, vull);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_or(vbll, vull);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_or(vull, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vbll = vec_or(vbll, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
+
+  res_vd = vec_or(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK-LE: or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vd = vec_or(vbll, vd);
+// CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]]
+// CHECK: bitcast <2 x i64> [[T2]] to <2 x double>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK-LE: [[T2:%.+]] = or <2 x i64> %{{[0-9]+}}, [[T1]]
+// CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double>
+
+  res_vd = vec_or(vd, vbll);
+// CHECK: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> [[T2]] to <2 x double>
+// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
+// CHECK-LE: [[T2:%.+]] = or <2 x i64> [[T1]], %{{[0-9]+}}
+// CHECK-LE: bitcast <2 x i64> [[T2]] to <2 x double>
+
+  res_vf = vec_re(vf);
+// CHECK: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float>
+// CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvresp(<4 x float>
+
+  res_vd = vec_re(vd);
+// CHECK: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double>
+// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double>
+
+  res_vf = vec_rint(vf);
+// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_rint(vd);
+// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vf = vec_rsqrte(vf);
+// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_rsqrte(vd);
+// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vf = vec_sel(vd, vd, vbll);
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64> %{{[0-9]+}},
+// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: or <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: and <2 x i64> %{{[0-9]+}},
+// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: or <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_sel(vd, vd, vull);
+// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64> %{{[0-9]+}},
+// CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: or <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: and <2 x i64> %{{[0-9]+}},
+// CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: or <2 x i64>
+// CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
+
+  res_vf = vec_sqrt(vf);
+// CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_sqrt(vd);
+// CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{[0-9]+}})
+
+  res_vd = vec_sub(vd, vd);
+// CHECK: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  res_vf = vec_trunc(vf);
+// CHECK: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.trunc.v4f32(<4 x float> %{{[0-9]+}})
+
+  res_vd = vec_trunc(vd);
+// CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{[0-9]+}})
 
   /* vec_vor */
   res_vsll = vec_vor(vsll, vsll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vsll = vec_vor(vbll, vsll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vsll = vec_vor(vsll, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_vor(vull, vull);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_vor(vbll, vull);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vull = vec_vor(vull, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   res_vbll = vec_vor(vbll, vbll);
 // CHECK: or <2 x i64>
+// CHECK-LE: or <2 x i64>
 
   /* vec_xor */
   res_vsll = vec_xor(vsll, vsll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vsll = vec_xor(vbll, vsll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vsll = vec_xor(vsll, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_xor(vull, vull);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_xor(vbll, vull);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_xor(vull, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vbll = vec_xor(vbll, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_xor(vd, vd);
+// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
+// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_xor(vd, vbll);
+// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
+// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+// CHECK-LE: call void @dummy()
+
+  res_vd = vec_xor(vbll, vd);
+// CHECK: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK: bitcast <2 x i64> [[X1]] to <2 x double>
+// CHECK-LE: [[X1:%.+]] = xor <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
+// CHECK-LE: bitcast <2 x i64> [[X1]] to <2 x double>
 
   /* vec_vxor */
   res_vsll = vec_vxor(vsll, vsll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vsll = vec_vxor(vbll, vsll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vsll = vec_vxor(vsll, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_vxor(vull, vull);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_vxor(vbll, vull);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vull = vec_vxor(vull, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
   res_vbll = vec_vxor(vbll, vbll);
 // CHECK: xor <2 x i64>
+// CHECK-LE: xor <2 x i64>
 
 }
diff --git a/test/CodeGen/builtins-ppc.c b/test/CodeGen/builtins-ppc.c
index 9ef5e37..1f17787 100644
--- a/test/CodeGen/builtins-ppc.c
+++ b/test/CodeGen/builtins-ppc.c
@@ -7,3 +7,10 @@
   res = __builtin_eh_return_data_regno(0);  // CHECK: store volatile i32 3
   res = __builtin_eh_return_data_regno(1);  // CHECK: store volatile i32 4
 }
+
+// CHECK-LABEL: define i64 @test_builtin_ppc_get_timebase
+long long test_builtin_ppc_get_timebase() {
+  // CHECK: call i64 @llvm.readcyclecounter()
+  return __builtin_ppc_get_timebase();
+}
+
diff --git a/test/CodeGen/builtins-systemz-zvector-error.c b/test/CodeGen/builtins-systemz-zvector-error.c
new file mode 100644
index 0000000..8d5380d
--- /dev/null
+++ b/test/CodeGen/builtins-systemz-zvector-error.c
@@ -0,0 +1,576 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu z13 -triple s390x-linux-gnu \
+// RUN: -fzvector -fno-lax-vector-conversions \
+// RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s
+
+#include <vecintrin.h>
+
+volatile vector signed char vsc;
+volatile vector signed short vss;
+volatile vector signed int vsi;
+volatile vector signed long long vsl;
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+volatile vector bool char vbc;
+volatile vector bool short vbs;
+volatile vector bool int vbi;
+volatile vector bool long long vbl;
+volatile vector double vd;
+
+volatile signed char sc;
+volatile signed short ss;
+volatile signed int si;
+volatile signed long long sl;
+volatile unsigned char uc;
+volatile unsigned short us;
+volatile unsigned int ui;
+volatile unsigned long long ul;
+volatile double d;
+
+const void * volatile cptr;
+const signed char * volatile cptrsc;
+const signed short * volatile cptrss;
+const signed int * volatile cptrsi;
+const signed long long * volatile cptrsl;
+const unsigned char * volatile cptruc;
+const unsigned short * volatile cptrus;
+const unsigned int * volatile cptrui;
+const unsigned long long * volatile cptrul;
+const float * volatile cptrf;
+const double * volatile cptrd;
+
+void * volatile ptr;
+signed char * volatile ptrsc;
+signed short * volatile ptrss;
+signed int * volatile ptrsi;
+signed long long * volatile ptrsl;
+unsigned char * volatile ptruc;
+unsigned short * volatile ptrus;
+unsigned int * volatile ptrui;
+unsigned long long * volatile ptrul;
+float * volatile ptrf;
+double * volatile ptrd;
+
+volatile unsigned int len;
+volatile int idx;
+int cc;
+
+void test_core(void) {
+  len = __lcbb(cptr, idx);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* {{must be a constant power of 2 from 64 to 4096}}
+  len = __lcbb(cptr, 200);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* {{must be a constant power of 2 from 64 to 4096}}
+  len = __lcbb(cptr, 32);    // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* {{must be a constant power of 2 from 64 to 4096}}
+  len = __lcbb(cptr, 8192);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* {{must be a constant power of 2 from 64 to 4096}}
+
+  vsl = vec_permi(vsl, vsl, idx); // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsl = vec_permi(vsl, vsl, -1);  // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsl = vec_permi(vsl, vsl, 4);   // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vul = vec_permi(vul, vul, idx); // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vul = vec_permi(vul, vul, -1);  // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vul = vec_permi(vul, vul, 4);   // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbl = vec_permi(vbl, vbl, idx); // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbl = vec_permi(vbl, vbl, -1);  // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbl = vec_permi(vbl, vbl, 4);   // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 2 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vd = vec_permi(vd, vd, idx);    // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vd = vec_permi(vd, vd, -1);     // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vd = vec_permi(vd, vd, 4);      // expected-error {{no matching function}}
+                                  // expected-note@vecintrin.h:* 3 {{candidate function not viable}}
+                                  // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+
+  vsi = vec_gather_element(vsi, vui, cptrsi, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsi = vec_gather_element(vsi, vui, cptrsi, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsi = vec_gather_element(vsi, vui, cptrsi, 4);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vui = vec_gather_element(vui, vui, cptrui, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vui = vec_gather_element(vui, vui, cptrui, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vui = vec_gather_element(vui, vui, cptrui, 4);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_gather_element(vbi, vui, cptrui, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_gather_element(vbi, vui, cptrui, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_gather_element(vbi, vui, cptrui, 4);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vsl = vec_gather_element(vsl, vul, cptrsl, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vsl = vec_gather_element(vsl, vul, cptrsl, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vsl = vec_gather_element(vsl, vul, cptrsl, 2);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vul = vec_gather_element(vul, vul, cptrul, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vul = vec_gather_element(vul, vul, cptrul, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vul = vec_gather_element(vul, vul, cptrul, 2);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_gather_element(vbl, vul, cptrul, idx); // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_gather_element(vbl, vul, cptrul, -1);  // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_gather_element(vbl, vul, cptrul, 2);   // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vd = vec_gather_element(vd, vul, cptrd, idx);    // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vd = vec_gather_element(vd, vul, cptrd, -1);     // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vd = vec_gather_element(vd, vul, cptrd, 2);      // expected-error {{no matching function}}
+                                                   // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                                   // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+
+  vec_scatter_element(vsi, vui, ptrsi, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vsi, vui, ptrsi, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vsi, vui, ptrsi, 4);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vui, vui, ptrui, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vui, vui, ptrui, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vui, vui, ptrui, 4);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vbi, vui, ptrui, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vbi, vui, ptrui, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vbi, vui, ptrui, 4);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vec_scatter_element(vsl, vul, ptrsl, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vsl, vul, ptrsl, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vsl, vul, ptrsl, 2);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vul, vul, ptrul, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vul, vul, ptrul, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vul, vul, ptrul, 2);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vbl, vul, ptrul, idx); // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vbl, vul, ptrul, -1);  // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vbl, vul, ptrul, 2);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 5 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vd, vul, ptrd, idx);   // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vd, vul, ptrd, -1);    // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vec_scatter_element(vd, vul, ptrd, 2);     // expected-error {{no matching function}}
+                                             // expected-note@vecintrin.h:* 6 {{candidate function not viable}}
+                                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+
+  vsc = vec_load_bndry(cptrsc, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vsc = vec_load_bndry(cptrsc, 200);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vsc = vec_load_bndry(cptrsc, 32);    // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vsc = vec_load_bndry(cptrsc, 8192);  // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vuc = vec_load_bndry(cptruc, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vss = vec_load_bndry(cptrss, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vus = vec_load_bndry(cptrus, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vsi = vec_load_bndry(cptrsi, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vui = vec_load_bndry(cptrui, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vsl = vec_load_bndry(cptrsl, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+  vul = vec_load_bndry(cptrul, idx);   // expected-error {{no matching function}}
+                                       // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                       // expected-note@vecintrin.h:* 1 {{must be a constant power of 2 from 64 to 4096}}
+
+  vuc = vec_genmask(idx);  // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* {{must be a constant integer}}
+
+  vuc = vec_genmasks_8(0, idx);    // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vuc = vec_genmasks_8(idx, 0);    // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vuc = vec_genmasks_8(idx, idx);  // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vus = vec_genmasks_16(0, idx);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vus = vec_genmasks_16(idx, 0);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vus = vec_genmasks_16(idx, idx); // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vui = vec_genmasks_32(0, idx);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vui = vec_genmasks_32(idx, 0);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vui = vec_genmasks_32(idx, idx); // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vul = vec_genmasks_64(0, idx);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vul = vec_genmasks_64(idx, 0);   // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vul = vec_genmasks_64(idx, idx); // expected-error {{no matching function}}
+                                   // expected-note@vecintrin.h:* {{must be a constant integer}}
+
+  vsc = vec_splat(vsc, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsc = vec_splat(vsc, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsc = vec_splat(vsc, 16);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vuc = vec_splat(vuc, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vuc = vec_splat(vuc, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vuc = vec_splat(vuc, 16);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vbc = vec_splat(vbc, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vbc = vec_splat(vbc, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vbc = vec_splat(vbc, 16);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+  vss = vec_splat(vss, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+  vss = vec_splat(vss, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+  vss = vec_splat(vss, 8);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+  vus = vec_splat(vus, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vus = vec_splat(vus, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vus = vec_splat(vus, 8);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vbs = vec_splat(vbs, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vbs = vec_splat(vbs, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vbs = vec_splat(vbs, 8);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 7}}
+  vsi = vec_splat(vsi, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsi = vec_splat(vsi, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsi = vec_splat(vsi, 4);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vui = vec_splat(vui, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vui = vec_splat(vui, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vui = vec_splat(vui, 4);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_splat(vbi, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_splat(vbi, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vbi = vec_splat(vbi, 4);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 3}}
+  vsl = vec_splat(vsl, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vsl = vec_splat(vsl, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vsl = vec_splat(vsl, 2);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vul = vec_splat(vul, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vul = vec_splat(vul, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vul = vec_splat(vul, 2);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_splat(vbl, idx); // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_splat(vbl, -1);  // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vbl = vec_splat(vbl, 2);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 11 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 2 {{must be a constant integer from 0 to 1}}
+  vd = vec_splat(vd, idx);   // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vd = vec_splat(vd, -1);    // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+  vd = vec_splat(vd, 2);     // expected-error {{no matching function}}
+                             // expected-note@vecintrin.h:* 12 {{candidate function not viable}}
+                             // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
+
+  vsc = vec_splat_s8(idx);  // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vuc = vec_splat_u8(idx);  // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vss = vec_splat_s16(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vus = vec_splat_u16(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vsi = vec_splat_s32(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vui = vec_splat_u32(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vsl = vec_splat_s64(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+  vul = vec_splat_u64(idx); // expected-error {{no matching function}}
+                            // expected-note@vecintrin.h:* {{must be a constant integer}}
+}
+
+void test_integer(void) {
+  vsc = vec_rl_mask(vsc, vuc, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vuc = vec_rl_mask(vuc, vuc, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vss = vec_rl_mask(vss, vus, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vus = vec_rl_mask(vus, vus, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vsi = vec_rl_mask(vsi, vui, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vui = vec_rl_mask(vui, vui, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vsl = vec_rl_mask(vsl, vul, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+  vul = vec_rl_mask(vul, vul, idx); // expected-error {{no matching function}}
+                                    // expected-note@vecintrin.h:* 7 {{candidate function not viable}}
+                                    // expected-note@vecintrin.h:* 1 {{must be a constant integer}}
+
+  vsc = vec_sld(vsc, vsc, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsc = vec_sld(vsc, vsc, -1);  // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsc = vec_sld(vsc, vsc, 16);  // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vuc = vec_sld(vuc, vuc, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vuc = vec_sld(vuc, vuc, -1);  // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vuc = vec_sld(vuc, vuc, 16);  // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vss = vec_sld(vss, vss, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vus = vec_sld(vus, vus, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsi = vec_sld(vsi, vsi, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vui = vec_sld(vui, vui, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vsl = vec_sld(vsl, vsl, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vul = vec_sld(vul, vul, idx); // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+  vd = vec_sld(vd, vd, idx);    // expected-error {{no matching function}}
+                                // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+
+  vsc = vec_sldw(vsc, vsc, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsc = vec_sldw(vsc, vsc, -1);  // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsc = vec_sldw(vsc, vsc, 4);   // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vuc = vec_sldw(vuc, vuc, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vuc = vec_sldw(vuc, vuc, -1);  // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vuc = vec_sldw(vuc, vuc, 4);   // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vss = vec_sldw(vss, vss, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vus = vec_sldw(vus, vus, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsi = vec_sldw(vsi, vsi, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vui = vec_sldw(vui, vui, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vsl = vec_sldw(vsl, vsl, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vul = vec_sldw(vul, vul, idx); // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+  vd = vec_sldw(vd, vd, idx);    // expected-error {{no matching function}}
+                                 // expected-note@vecintrin.h:* 8 {{candidate function not viable}}
+                                 // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+}
+
+void test_float(void) {
+  vd = vec_ctd(vsl, idx); // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vd = vec_ctd(vsl, -1);  // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vd = vec_ctd(vsl, 32);  // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vd = vec_ctd(vul, idx); // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vd = vec_ctd(vul, -1);  // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vd = vec_ctd(vul, 32);  // expected-error {{no matching function}}
+                          // expected-note@vecintrin.h:* 1 {{candidate function not viable}}
+                          // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+
+  vsl = vec_ctsl(vd, idx); // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vsl = vec_ctsl(vd, -1);  // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vsl = vec_ctsl(vd, 32);  // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vul = vec_ctul(vd, idx); // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vul = vec_ctul(vd, -1);  // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+  vul = vec_ctul(vd, 32);  // expected-error {{no matching function}}
+                           // expected-note@vecintrin.h:* 1 {{must be a constant integer from 0 to 31}}
+
+  vbl = vec_fp_test_data_class(vd, idx, &cc);  // expected-error {{must be a constant integer}}
+  vbl = vec_fp_test_data_class(vd, -1, &cc);   // expected-error {{should be a value from 0 to 4095}}
+  vbl = vec_fp_test_data_class(vd, 4096, &cc); // expected-error {{should be a value from 0 to 4095}}
+}
diff --git a/test/CodeGen/builtins-systemz-zvector.c b/test/CodeGen/builtins-systemz-zvector.c
new file mode 100644
index 0000000..6d554af
--- /dev/null
+++ b/test/CodeGen/builtins-systemz-zvector.c
@@ -0,0 +1,2967 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu z13 -triple s390x-linux-gnu \
+// RUN: -O -fzvector -fno-lax-vector-conversions \
+// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+
+#include <vecintrin.h>
+
+volatile vector signed char vsc;
+volatile vector signed short vss;
+volatile vector signed int vsi;
+volatile vector signed long long vsl;
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+volatile vector bool char vbc;
+volatile vector bool short vbs;
+volatile vector bool int vbi;
+volatile vector bool long long vbl;
+volatile vector double vd;
+
+volatile signed char sc;
+volatile signed short ss;
+volatile signed int si;
+volatile signed long long sl;
+volatile unsigned char uc;
+volatile unsigned short us;
+volatile unsigned int ui;
+volatile unsigned long long ul;
+volatile double d;
+
+const void * volatile cptr;
+const signed char * volatile cptrsc;
+const signed short * volatile cptrss;
+const signed int * volatile cptrsi;
+const signed long long * volatile cptrsl;
+const unsigned char * volatile cptruc;
+const unsigned short * volatile cptrus;
+const unsigned int * volatile cptrui;
+const unsigned long long * volatile cptrul;
+const float * volatile cptrf;
+const double * volatile cptrd;
+
+void * volatile ptr;
+signed char * volatile ptrsc;
+signed short * volatile ptrss;
+signed int * volatile ptrsi;
+signed long long * volatile ptrsl;
+unsigned char * volatile ptruc;
+unsigned short * volatile ptrus;
+unsigned int * volatile ptrui;
+unsigned long long * volatile ptrul;
+float * volatile ptrf;
+double * volatile ptrd;
+
+volatile unsigned int len;
+volatile int idx;
+int cc;
+
+void test_core(void) {
+  len = __lcbb(cptr, 64);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 0)
+  len = __lcbb(cptr, 128);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 1)
+  len = __lcbb(cptr, 256);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 2)
+  len = __lcbb(cptr, 512);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 3)
+  len = __lcbb(cptr, 1024);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 4)
+  len = __lcbb(cptr, 2048);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 5)
+  len = __lcbb(cptr, 4096);
+  // CHECK: call i32 @llvm.s390.lcbb(i8* %{{.*}}, i32 6)
+
+  sc = vec_extract(vsc, idx);
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 %{{.*}}
+  uc = vec_extract(vuc, idx);
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 %{{.*}}
+  uc = vec_extract(vbc, idx);
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 %{{.*}}
+  ss = vec_extract(vss, idx);
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 %{{.*}}
+  us = vec_extract(vus, idx);
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 %{{.*}}
+  us = vec_extract(vbs, idx);
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 %{{.*}}
+  si = vec_extract(vsi, idx);
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 %{{.*}}
+  ui = vec_extract(vui, idx);
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 %{{.*}}
+  ui = vec_extract(vbi, idx);
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 %{{.*}}
+  sl = vec_extract(vsl, idx);
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 %{{.*}}
+  ul = vec_extract(vul, idx);
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 %{{.*}}
+  ul = vec_extract(vbl, idx);
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 %{{.*}}
+  d = vec_extract(vd, idx);
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 %{{.*}}
+
+  vsc = vec_insert(sc, vsc, idx);
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 %{{.*}}
+  vuc = vec_insert(uc, vuc, idx);
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 %{{.*}}
+  vuc = vec_insert(uc, vbc, idx);
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 %{{.*}}
+  vss = vec_insert(ss, vss, idx);
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 %{{.*}}
+  vus = vec_insert(us, vus, idx);
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 %{{.*}}
+  vus = vec_insert(us, vbs, idx);
+  // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 %{{.*}}
+  vsi = vec_insert(si, vsi, idx);
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}
+  vui = vec_insert(ui, vui, idx);
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}
+  vui = vec_insert(ui, vbi, idx);
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}
+  vsl = vec_insert(sl, vsl, idx);
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 %{{.*}}
+  vul = vec_insert(ul, vul, idx);
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 %{{.*}}
+  vul = vec_insert(ul, vbl, idx);
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 %{{.*}}
+  vd = vec_insert(d, vd, idx);
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 %{{.*}}
+
+  vsc = vec_promote(sc, idx);
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 %{{.*}}
+  vuc = vec_promote(uc, idx);
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 %{{.*}}
+  vss = vec_promote(ss, idx);
+  // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 %{{.*}}
+  vus = vec_promote(us, idx);
+  // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 %{{.*}}
+  vsi = vec_promote(si, idx);
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 %{{.*}}
+  vui = vec_promote(ui, idx);
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 %{{.*}}
+  vsl = vec_promote(sl, idx);
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 %{{.*}}
+  vul = vec_promote(ul, idx);
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 %{{.*}}
+  vd = vec_promote(d, idx);
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 %{{.*}}
+
+  vsc = vec_insert_and_zero(cptrsc);
+  // CHECK: insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %{{.*}}, i32 7
+  vuc = vec_insert_and_zero(cptruc);
+  // CHECK: insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %{{.*}}, i32 7
+  vss = vec_insert_and_zero(cptrss);
+  // CHECK: insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0>, i16 %{{.*}}, i32 3
+  vus = vec_insert_and_zero(cptrus);
+  // CHECK: insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0>, i16 %{{.*}}, i32 3
+  vsi = vec_insert_and_zero(cptrsi);
+  // CHECK: insertelement <4 x i32> <i32 0, i32 undef, i32 0, i32 0>, i32 %{{.*}}, i32 1
+  vui = vec_insert_and_zero(cptrui);
+  // CHECK: insertelement <4 x i32> <i32 0, i32 undef, i32 0, i32 0>, i32 %{{.*}}, i32 1
+  vsl = vec_insert_and_zero(cptrsl);
+  // CHECK: insertelement <2 x i64> <i64 undef, i64 0>, i64 %{{.*}}, i32 0
+  vul = vec_insert_and_zero(cptrul);
+  // CHECK: insertelement <2 x i64> <i64 undef, i64 0>, i64 %{{.*}}, i32 0
+  vd = vec_insert_and_zero(cptrd);
+  // CHECK: insertelement <2 x double> <double undef, double 0.000000e+00>, double %{{.*}}, i32 0
+
+  vsc = vec_perm(vsc, vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_perm(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_perm(vbc, vbc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_perm(vss, vss, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_perm(vus, vus, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_perm(vbs, vbs, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_perm(vsi, vsi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_perm(vui, vui, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_perm(vbi, vbi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_perm(vsl, vsl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_perm(vul, vul, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_perm(vbl, vbl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_perm(vd, vd, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsl = vec_permi(vsl, vsl, 0);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vsl = vec_permi(vsl, vsl, 1);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 1)
+  vsl = vec_permi(vsl, vsl, 2);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 4)
+  vsl = vec_permi(vsl, vsl, 3);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 5)
+  vul = vec_permi(vul, vul, 0);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vul = vec_permi(vul, vul, 1);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 1)
+  vul = vec_permi(vul, vul, 2);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 4)
+  vul = vec_permi(vul, vul, 3);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 5)
+  vbl = vec_permi(vbl, vbl, 0);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vbl = vec_permi(vbl, vbl, 1);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 1)
+  vbl = vec_permi(vbl, vbl, 2);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 4)
+  vbl = vec_permi(vbl, vbl, 3);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 5)
+  vd = vec_permi(vd, vd, 0);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vd = vec_permi(vd, vd, 1);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 1)
+  vd = vec_permi(vd, vd, 2);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 4)
+  vd = vec_permi(vd, vd, 3);
+  // CHECK: call <2 x i64> @llvm.s390.vpdi(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 5)
+
+  vsc = vec_sel(vsc, vsc, vuc);
+  vsc = vec_sel(vsc, vsc, vbc);
+  vuc = vec_sel(vuc, vuc, vuc);
+  vuc = vec_sel(vuc, vuc, vbc);
+  vbc = vec_sel(vbc, vbc, vuc);
+  vbc = vec_sel(vbc, vbc, vbc);
+  vss = vec_sel(vss, vss, vus);
+  vss = vec_sel(vss, vss, vbs);
+  vus = vec_sel(vus, vus, vus);
+  vus = vec_sel(vus, vus, vbs);
+  vbs = vec_sel(vbs, vbs, vus);
+  vbs = vec_sel(vbs, vbs, vbs);
+  vsi = vec_sel(vsi, vsi, vui);
+  vsi = vec_sel(vsi, vsi, vbi);
+  vui = vec_sel(vui, vui, vui);
+  vui = vec_sel(vui, vui, vbi);
+  vbi = vec_sel(vbi, vbi, vui);
+  vbi = vec_sel(vbi, vbi, vbi);
+  vsl = vec_sel(vsl, vsl, vul);
+  vsl = vec_sel(vsl, vsl, vbl);
+  vul = vec_sel(vul, vul, vul);
+  vul = vec_sel(vul, vul, vbl);
+  vbl = vec_sel(vbl, vbl, vul);
+  vbl = vec_sel(vbl, vbl, vbl);
+  vd = vec_sel(vd, vd, vul);
+  vd = vec_sel(vd, vd, vbl);
+
+  vsi = vec_gather_element(vsi, vui, cptrsi, 0);
+  vsi = vec_gather_element(vsi, vui, cptrsi, 1);
+  vsi = vec_gather_element(vsi, vui, cptrsi, 2);
+  vsi = vec_gather_element(vsi, vui, cptrsi, 3);
+  vui = vec_gather_element(vui, vui, cptrui, 0);
+  vui = vec_gather_element(vui, vui, cptrui, 1);
+  vui = vec_gather_element(vui, vui, cptrui, 2);
+  vui = vec_gather_element(vui, vui, cptrui, 3);
+  vbi = vec_gather_element(vbi, vui, cptrui, 0);
+  vbi = vec_gather_element(vbi, vui, cptrui, 1);
+  vbi = vec_gather_element(vbi, vui, cptrui, 2);
+  vbi = vec_gather_element(vbi, vui, cptrui, 3);
+  vsl = vec_gather_element(vsl, vul, cptrsl, 0);
+  vsl = vec_gather_element(vsl, vul, cptrsl, 1);
+  vul = vec_gather_element(vul, vul, cptrul, 0);
+  vul = vec_gather_element(vul, vul, cptrul, 1);
+  vbl = vec_gather_element(vbl, vul, cptrul, 0);
+  vbl = vec_gather_element(vbl, vul, cptrul, 1);
+  vd = vec_gather_element(vd, vul, cptrd, 0);
+  vd = vec_gather_element(vd, vul, cptrd, 1);
+
+  vec_scatter_element(vsi, vui, ptrsi, 0);
+  vec_scatter_element(vsi, vui, ptrsi, 1);
+  vec_scatter_element(vsi, vui, ptrsi, 2);
+  vec_scatter_element(vsi, vui, ptrsi, 3);
+  vec_scatter_element(vui, vui, ptrui, 0);
+  vec_scatter_element(vui, vui, ptrui, 1);
+  vec_scatter_element(vui, vui, ptrui, 2);
+  vec_scatter_element(vui, vui, ptrui, 3);
+  vec_scatter_element(vbi, vui, ptrui, 0);
+  vec_scatter_element(vbi, vui, ptrui, 1);
+  vec_scatter_element(vbi, vui, ptrui, 2);
+  vec_scatter_element(vbi, vui, ptrui, 3);
+  vec_scatter_element(vsl, vul, ptrsl, 0);
+  vec_scatter_element(vsl, vul, ptrsl, 1);
+  vec_scatter_element(vul, vul, ptrul, 0);
+  vec_scatter_element(vul, vul, ptrul, 1);
+  vec_scatter_element(vbl, vul, ptrul, 0);
+  vec_scatter_element(vbl, vul, ptrul, 1);
+  vec_scatter_element(vd, vul, ptrd, 0);
+  vec_scatter_element(vd, vul, ptrd, 1);
+
+  vsc = vec_xld2(idx, cptrsc);
+  vuc = vec_xld2(idx, cptruc);
+  vss = vec_xld2(idx, cptrss);
+  vus = vec_xld2(idx, cptrus);
+  vsi = vec_xld2(idx, cptrsi);
+  vui = vec_xld2(idx, cptrui);
+  vsl = vec_xld2(idx, cptrsl);
+  vul = vec_xld2(idx, cptrul);
+  vd = vec_xld2(idx, cptrd);
+
+  vsc = vec_xlw4(idx, cptrsc);
+  vuc = vec_xlw4(idx, cptruc);
+  vss = vec_xlw4(idx, cptrss);
+  vus = vec_xlw4(idx, cptrus);
+  vsi = vec_xlw4(idx, cptrsi);
+  vui = vec_xlw4(idx, cptrui);
+
+  vec_xstd2(vsc, idx, ptrsc);
+  vec_xstd2(vuc, idx, ptruc);
+  vec_xstd2(vss, idx, ptrss);
+  vec_xstd2(vus, idx, ptrus);
+  vec_xstd2(vsi, idx, ptrsi);
+  vec_xstd2(vui, idx, ptrui);
+  vec_xstd2(vsl, idx, ptrsl);
+  vec_xstd2(vul, idx, ptrul);
+  vec_xstd2(vd, idx, ptrd);
+
+  vec_xstw4(vsc, idx, ptrsc);
+  vec_xstw4(vuc, idx, ptruc);
+  vec_xstw4(vss, idx, ptrss);
+  vec_xstw4(vus, idx, ptrus);
+  vec_xstw4(vsi, idx, ptrsi);
+  vec_xstw4(vui, idx, ptrui);
+
+  vsc = vec_load_bndry(cptrsc, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vuc = vec_load_bndry(cptruc, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vss = vec_load_bndry(cptrss, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vus = vec_load_bndry(cptrus, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vsi = vec_load_bndry(cptrsi, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vui = vec_load_bndry(cptrui, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vsl = vec_load_bndry(cptrsl, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vul = vec_load_bndry(cptrul, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vd = vec_load_bndry(cptrd, 64);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 0)
+  vsc = vec_load_bndry(cptrsc, 128);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 1)
+  vsc = vec_load_bndry(cptrsc, 256);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 2)
+  vsc = vec_load_bndry(cptrsc, 512);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 3)
+  vsc = vec_load_bndry(cptrsc, 1024);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 4)
+  vsc = vec_load_bndry(cptrsc, 2048);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 5)
+  vsc = vec_load_bndry(cptrsc, 4096);
+  // CHECK: call <16 x i8> @llvm.s390.vlbb(i8* %{{.*}}, i32 6)
+
+  vsc = vec_load_len(cptrsc, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vuc = vec_load_len(cptruc, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vss = vec_load_len(cptrss, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vus = vec_load_len(cptrus, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vsi = vec_load_len(cptrsi, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vui = vec_load_len(cptrui, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vsl = vec_load_len(cptrsl, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vul = vec_load_len(cptrul, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+  vd = vec_load_len(cptrd, idx);
+  // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, i8* %{{.*}})
+
+  vec_store_len(vsc, ptrsc, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vuc, ptruc, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vss, ptrss, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vus, ptrus, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vsi, ptrsi, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vui, ptrui, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vsl, ptrsl, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vul, ptrul, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+  vec_store_len(vd, ptrd, idx);
+  // CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, i8* %{{.*}})
+
+  vsl = vec_load_pair(sl, sl);
+  vul = vec_load_pair(ul, ul);
+
+  vuc = vec_genmask(0);
+  // CHECK: <16 x i8> zeroinitializer
+  vuc = vec_genmask(0x8000);
+  // CHECK: <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  vuc = vec_genmask(0xffff);
+  // CHECK: <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+
+  vuc = vec_genmasks_8(0, 7);
+  // CHECK: <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  vuc = vec_genmasks_8(1, 4);
+  // CHECK: <16 x i8> <i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120>
+  vuc = vec_genmasks_8(6, 2);
+  // CHECK: <16 x i8> <i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29>
+  vus = vec_genmasks_16(0, 15);
+  // CHECK: <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  vus = vec_genmasks_16(2, 11);
+  // CHECK: <8 x i16> <i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368>
+  vus = vec_genmasks_16(9, 2);
+  // CHECK:  <8 x i16> <i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065>
+  vui = vec_genmasks_32(0, 31);
+  // CHECK: <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  vui = vec_genmasks_32(7, 20);
+  // CHECK: <4 x i32> <i32 33552384, i32 33552384, i32 33552384, i32 33552384>
+  vui = vec_genmasks_32(25, 4);
+  // CHECK: <4 x i32> <i32 -134217601, i32 -134217601, i32 -134217601, i32 -134217601>
+  vul = vec_genmasks_64(0, 63);
+  // CHECK: <2 x i64> <i64 -1, i64 -1>
+  vul = vec_genmasks_64(3, 40);
+  // CHECK: <2 x i64> <i64 2305843009205305344, i64 2305843009205305344>
+  vul = vec_genmasks_64(30, 11);
+  // CHECK: <2 x i64> <i64 -4503582447501313, i64 -4503582447501313>
+
+  vsc = vec_splat(vsc, 0);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
+  vsc = vec_splat(vsc, 15);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  vuc = vec_splat(vuc, 0);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
+  vuc = vec_splat(vuc, 15);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  vbc = vec_splat(vbc, 0);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
+  vbc = vec_splat(vbc, 15);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  vss = vec_splat(vss, 0);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
+  vss = vec_splat(vss, 7);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  vus = vec_splat(vus, 0);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
+  vus = vec_splat(vus, 7);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  vbs = vec_splat(vbs, 0);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
+  vbs = vec_splat(vbs, 7);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  vsi = vec_splat(vsi, 0);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  vsi = vec_splat(vsi, 3);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  vui = vec_splat(vui, 0);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  vui = vec_splat(vui, 3);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  vbi = vec_splat(vbi, 0);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  vbi = vec_splat(vbi, 3);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  vsl = vec_splat(vsl, 0);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
+  vsl = vec_splat(vsl, 1);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  vul = vec_splat(vul, 0);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
+  vul = vec_splat(vul, 1);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  vbl = vec_splat(vbl, 0);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
+  vbl = vec_splat(vbl, 1);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  vd = vec_splat(vd, 0);
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
+  vd = vec_splat(vd, 1);
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+
+  vsc = vec_splat_s8(-128);
+  // CHECK: <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  vsc = vec_splat_s8(127);
+  // CHECK: <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
+  vuc = vec_splat_u8(1);
+  // CHECK: <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  vuc = vec_splat_u8(254);
+  // CHECK: <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>
+  vss = vec_splat_s16(-32768);
+  // CHECK: <8 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  vss = vec_splat_s16(32767);
+  // CHECK: <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
+  vus = vec_splat_u16(1);
+  // CHECK: <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  vus = vec_splat_u16(65534);
+  // CHECK: <8 x i16> <i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2>
+  vsi = vec_splat_s32(-32768);
+  // CHECK: <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+  vsi = vec_splat_s32(32767);
+  // CHECK: <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+  vui = vec_splat_u32(-32768);
+  // CHECK: <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+  vui = vec_splat_u32(32767);
+  // CHECK: <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+  vsl = vec_splat_s64(-32768);
+  // CHECK: <2 x i64> <i64 -32768, i64 -32768>
+  vsl = vec_splat_s64(32767);
+  // CHECK: <2 x i64> <i64 32767, i64 32767>
+  vul = vec_splat_u64(-32768);
+  // CHECK: <2 x i64> <i64 -32768, i64 -32768>
+  vul = vec_splat_u64(32767);
+  // CHECK: <2 x i64> <i64 32767, i64 32767>
+
+  vsc = vec_splats(sc);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
+  vuc = vec_splats(uc);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> zeroinitializer
+  vss = vec_splats(ss);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
+  vus = vec_splats(us);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> zeroinitializer
+  vsi = vec_splats(si);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  vui = vec_splats(ui);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  vsl = vec_splats(sl);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
+  vul = vec_splats(ul);
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> zeroinitializer
+  vd = vec_splats(d);
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> zeroinitializer
+
+  vsl = vec_extend_s64(vsc);
+  vsl = vec_extend_s64(vss);
+  vsl = vec_extend_s64(vsi);
+
+  vsc = vec_mergeh(vsc, vsc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  vuc = vec_mergeh(vuc, vuc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  vbc = vec_mergeh(vbc, vbc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  vss = vec_mergeh(vss, vss);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  vus = vec_mergeh(vus, vus);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  vbs = vec_mergeh(vbs, vbs);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  vsi = vec_mergeh(vsi, vsi);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  vui = vec_mergeh(vui, vui);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  vbi = vec_mergeh(vbi, vbi);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  vsl = vec_mergeh(vsl, vsl);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  vul = vec_mergeh(vul, vul);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  vbl = vec_mergeh(vbl, vbl);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  vd = vec_mergeh(vd, vd);
+  // shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
+
+  vsc = vec_mergel(vsc, vsc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  vuc = vec_mergel(vuc, vuc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  vbc = vec_mergel(vbc, vbc);
+  // shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  vss = vec_mergel(vss, vss);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  vus = vec_mergel(vus, vus);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  vbs = vec_mergel(vbs, vbs);
+  // shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  vsi = vec_mergel(vsi, vsi);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <i32 2, i32 6, i32 3, i32 7>
+  vui = vec_mergel(vui, vui);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <i32 2, i32 6, i32 3, i32 7>
+  vbi = vec_mergel(vbi, vbi);
+  // shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <i32 2, i32 6, i32 3, i32 7>
+  vsl = vec_mergel(vsl, vsl);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <i32 1, i32 3>
+  vul = vec_mergel(vul, vul);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <i32 1, i32 3>
+  vbl = vec_mergel(vbl, vbl);
+  // shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <i32 1, i32 3>
+  vd = vec_mergel(vd, vd);
+  // shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <i32 1, i32 3>
+
+  vsc = vec_pack(vss, vss);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  vuc = vec_pack(vus, vus);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  vbc = vec_pack(vbs, vbs);
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  vss = vec_pack(vsi, vsi);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  vus = vec_pack(vui, vui);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  vbs = vec_pack(vbi, vbi);
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  vsi = vec_pack(vsl, vsl);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  vui = vec_pack(vul, vul);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  vbi = vec_pack(vbl, vbl);
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+
+  vsc = vec_packs(vss, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vpksh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vuc = vec_packs(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vss = vec_packs(vsi, vsi);
+  // CHECK: call <8 x i16> @llvm.s390.vpksf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vus = vec_packs(vui, vui);
+  // CHECK: call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vsi = vec_packs(vsl, vsl);
+  // CHECK: call <4 x i32> @llvm.s390.vpksg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  vui = vec_packs(vul, vul);
+  // CHECK: call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vsc = vec_packs_cc(vss, vss, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vpkshs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vuc = vec_packs_cc(vus, vus, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vpklshs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vss = vec_packs_cc(vsi, vsi, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vpksfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vus = vec_packs_cc(vui, vui, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vpklsfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vsi = vec_packs_cc(vsl, vsl, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vpksgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  vui = vec_packs_cc(vul, vul, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vpklsgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vuc = vec_packsu(vss, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vuc = vec_packsu(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_packsu(vsi, vsi);
+  // CHECK: call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vus = vec_packsu(vui, vui);
+  // CHECK: call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_packsu(vsl, vsl);
+  // CHECK: call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  vui = vec_packsu(vul, vul);
+  // CHECK: call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vuc = vec_packsu_cc(vus, vus, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vpklshs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_packsu_cc(vui, vui, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vpklsfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_packsu_cc(vul, vul, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vpklsgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vss = vec_unpackh(vsc);
+  // CHECK: call <8 x i16> @llvm.s390.vuphb(<16 x i8> %{{.*}})
+  vus = vec_unpackh(vuc);
+  // CHECK: call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %{{.*}})
+  vbs = vec_unpackh(vbc);
+  // CHECK: call <8 x i16> @llvm.s390.vuphb(<16 x i8> %{{.*}})
+  vsi = vec_unpackh(vss);
+  // CHECK: call <4 x i32> @llvm.s390.vuphh(<8 x i16> %{{.*}})
+  vui = vec_unpackh(vus);
+  // CHECK: call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %{{.*}})
+  vbi = vec_unpackh(vbs);
+  // CHECK: call <4 x i32> @llvm.s390.vuphh(<8 x i16> %{{.*}})
+  vsl = vec_unpackh(vsi);
+  // CHECK: call <2 x i64> @llvm.s390.vuphf(<4 x i32> %{{.*}})
+  vul = vec_unpackh(vui);
+  // CHECK: call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %{{.*}})
+  vbl = vec_unpackh(vbi);
+  // CHECK: call <2 x i64> @llvm.s390.vuphf(<4 x i32> %{{.*}})
+
+  vss = vec_unpackl(vsc);
+  // CHECK: call <8 x i16> @llvm.s390.vuplb(<16 x i8> %{{.*}})
+  vus = vec_unpackl(vuc);
+  // CHECK: call <8 x i16> @llvm.s390.vupllb(<16 x i8> %{{.*}})
+  vbs = vec_unpackl(vbc);
+  // CHECK: call <8 x i16> @llvm.s390.vuplb(<16 x i8> %{{.*}})
+  vsi = vec_unpackl(vss);
+  // CHECK: call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %{{.*}})
+  vui = vec_unpackl(vus);
+  // CHECK: call <4 x i32> @llvm.s390.vupllh(<8 x i16> %{{.*}})
+  vbi = vec_unpackl(vbs);
+  // CHECK: call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %{{.*}})
+  vsl = vec_unpackl(vsi);
+  // CHECK: call <2 x i64> @llvm.s390.vuplf(<4 x i32> %{{.*}})
+  vul = vec_unpackl(vui);
+  // CHECK: call <2 x i64> @llvm.s390.vupllf(<4 x i32> %{{.*}})
+  vbl = vec_unpackl(vbi);
+  // CHECK: call <2 x i64> @llvm.s390.vuplf(<4 x i32> %{{.*}})
+}
+
+void test_compare(void) {
+  vbc = vec_cmpeq(vsc, vsc);
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmpeq(vuc, vuc);
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmpeq(vbc, vbc);
+  // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  vbs = vec_cmpeq(vss, vss);
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmpeq(vus, vus);
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmpeq(vbs, vbs);
+  // CHECK: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  vbi = vec_cmpeq(vsi, vsi);
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmpeq(vui, vui);
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmpeq(vbi, vbi);
+  // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  vbl = vec_cmpeq(vsl, vsl);
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpeq(vul, vul);
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpeq(vbl, vbl);
+  // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpeq(vd, vd);
+  // CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
+
+  vbc = vec_cmpge(vsc, vsc);
+  // CHECK: icmp sge <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmpge(vuc, vuc);
+  // CHECK: icmp uge <16 x i8> %{{.*}}, %{{.*}}
+  vbs = vec_cmpge(vss, vss);
+  // CHECK: icmp sge <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmpge(vus, vus);
+  // CHECK: icmp uge <8 x i16> %{{.*}}, %{{.*}}
+  vbi = vec_cmpge(vsi, vsi);
+  // CHECK: icmp sge <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmpge(vui, vui);
+  // CHECK: icmp uge <4 x i32> %{{.*}}, %{{.*}}
+  vbl = vec_cmpge(vsl, vsl);
+  // CHECK: icmp sge <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpge(vul, vul);
+  // CHECK: icmp uge <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpge(vd, vd);
+  // CHECK: fcmp oge <2 x double> %{{.*}}, %{{.*}}
+
+  vbc = vec_cmpgt(vsc, vsc);
+  // CHECK: icmp sgt <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmpgt(vuc, vuc);
+  // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}}
+  vbs = vec_cmpgt(vss, vss);
+  // CHECK: icmp sgt <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmpgt(vus, vus);
+  // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}}
+  vbi = vec_cmpgt(vsi, vsi);
+  // CHECK: icmp sgt <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmpgt(vui, vui);
+  // CHECK: icmp ugt <4 x i32> %{{.*}}, %{{.*}}
+  vbl = vec_cmpgt(vsl, vsl);
+  // CHECK: icmp sgt <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpgt(vul, vul);
+  // CHECK: icmp ugt <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmpgt(vd, vd);
+  // CHECK: fcmp ogt <2 x double> %{{.*}}, %{{.*}}
+
+  vbc = vec_cmple(vsc, vsc);
+  // CHECK: icmp sle <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmple(vuc, vuc);
+  // CHECK: icmp ule <16 x i8> %{{.*}}, %{{.*}}
+  vbs = vec_cmple(vss, vss);
+  // CHECK: icmp sle <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmple(vus, vus);
+  // CHECK: icmp ule <8 x i16> %{{.*}}, %{{.*}}
+  vbi = vec_cmple(vsi, vsi);
+  // CHECK: icmp sle <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmple(vui, vui);
+  // CHECK: icmp ule <4 x i32> %{{.*}}, %{{.*}}
+  vbl = vec_cmple(vsl, vsl);
+  // CHECK: icmp sle <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmple(vul, vul);
+  // CHECK: icmp ule <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmple(vd, vd);
+  // CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
+
+  vbc = vec_cmplt(vsc, vsc);
+  // CHECK: icmp slt <16 x i8> %{{.*}}, %{{.*}}
+  vbc = vec_cmplt(vuc, vuc);
+  // CHECK: icmp ult <16 x i8> %{{.*}}, %{{.*}}
+  vbs = vec_cmplt(vss, vss);
+  // CHECK: icmp slt <8 x i16> %{{.*}}, %{{.*}}
+  vbs = vec_cmplt(vus, vus);
+  // CHECK: icmp ult <8 x i16> %{{.*}}, %{{.*}}
+  vbi = vec_cmplt(vsi, vsi);
+  // CHECK: icmp slt <4 x i32> %{{.*}}, %{{.*}}
+  vbi = vec_cmplt(vui, vui);
+  // CHECK: icmp ult <4 x i32> %{{.*}}, %{{.*}}
+  vbl = vec_cmplt(vsl, vsl);
+  // CHECK: icmp slt <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmplt(vul, vul);
+  // CHECK: icmp ult <2 x i64> %{{.*}}, %{{.*}}
+  vbl = vec_cmplt(vd, vd);
+  // CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
+
+  idx = vec_all_eq(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_eq(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_eq(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_eq(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_eq(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfcedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_ne(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ne(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ne(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ne(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ne(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfcedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_ge(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_ge(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_ge(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_ge(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_ge(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_gt(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_gt(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_gt(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_gt(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_gt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_le(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_le(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_le(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_le(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_le(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_lt(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_all_lt(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_all_lt(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_all_lt(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_all_lt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_nge(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_all_ngt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_all_nle(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_all_nlt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_all_nan(vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 15)
+  idx = vec_all_numeric(vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 15)
+
+  idx = vec_any_eq(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_eq(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_eq(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_eq(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_eq(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfcedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_ne(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vceqbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ne(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vceqhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ne(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vceqfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ne(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vceqgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ne(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfcedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_ge(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_ge(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_ge(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_ge(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_ge(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_gt(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_gt(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_gt(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_gt(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_gt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_le(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_le(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_le(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_le(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_le(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_lt(vsc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vsc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vbc, vsc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vuc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vuc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vbc, vuc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vbc, vbc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vchlbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_any_lt(vss, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vss, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vbs, vss);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vus, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vus, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vbs, vus);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vbs, vbs);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vchlhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  idx = vec_any_lt(vsi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vsi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vbi, vsi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vui, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vui, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vbi, vui);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vbi, vbi);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vchlfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  idx = vec_any_lt(vsl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vsl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vbl, vsl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vul, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vul, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vbl, vul);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vbl, vbl);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vchlgs(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  idx = vec_any_lt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_nge(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_any_ngt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_any_nle(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  idx = vec_any_nlt(vd, vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  idx = vec_any_nan(vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 15)
+  idx = vec_any_numeric(vd);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 15)
+}
+
+void test_integer(void) {
+  vsc = vec_andc(vsc, vsc);
+  vsc = vec_andc(vsc, vbc);
+  vsc = vec_andc(vbc, vsc);
+  vuc = vec_andc(vuc, vuc);
+  vuc = vec_andc(vuc, vbc);
+  vuc = vec_andc(vbc, vuc);
+  vbc = vec_andc(vbc, vbc);
+  vss = vec_andc(vss, vss);
+  vss = vec_andc(vss, vbs);
+  vss = vec_andc(vbs, vss);
+  vus = vec_andc(vus, vus);
+  vus = vec_andc(vus, vbs);
+  vus = vec_andc(vbs, vus);
+  vbs = vec_andc(vbs, vbs);
+  vsi = vec_andc(vsi, vsi);
+  vsi = vec_andc(vsi, vbi);
+  vsi = vec_andc(vbi, vsi);
+  vui = vec_andc(vui, vui);
+  vui = vec_andc(vui, vbi);
+  vui = vec_andc(vbi, vui);
+  vbi = vec_andc(vbi, vbi);
+  vsl = vec_andc(vsl, vsl);
+  vsl = vec_andc(vsl, vbl);
+  vsl = vec_andc(vbl, vsl);
+  vul = vec_andc(vul, vul);
+  vul = vec_andc(vul, vbl);
+  vul = vec_andc(vbl, vul);
+  vbl = vec_andc(vbl, vbl);
+  vd = vec_andc(vd, vd);
+  vd = vec_andc(vd, vbl);
+  vd = vec_andc(vbl, vd);
+
+  vsc = vec_nor(vsc, vsc);
+  vsc = vec_nor(vsc, vbc);
+  vsc = vec_nor(vbc, vsc);
+  vuc = vec_nor(vuc, vuc);
+  vuc = vec_nor(vuc, vbc);
+  vuc = vec_nor(vbc, vuc);
+  vbc = vec_nor(vbc, vbc);
+  vss = vec_nor(vss, vss);
+  vss = vec_nor(vss, vbs);
+  vss = vec_nor(vbs, vss);
+  vus = vec_nor(vus, vus);
+  vus = vec_nor(vus, vbs);
+  vus = vec_nor(vbs, vus);
+  vbs = vec_nor(vbs, vbs);
+  vsi = vec_nor(vsi, vsi);
+  vsi = vec_nor(vsi, vbi);
+  vsi = vec_nor(vbi, vsi);
+  vui = vec_nor(vui, vui);
+  vui = vec_nor(vui, vbi);
+  vui = vec_nor(vbi, vui);
+  vbi = vec_nor(vbi, vbi);
+  vsl = vec_nor(vsl, vsl);
+  vsl = vec_nor(vsl, vbl);
+  vsl = vec_nor(vbl, vsl);
+  vul = vec_nor(vul, vul);
+  vul = vec_nor(vul, vbl);
+  vul = vec_nor(vbl, vul);
+  vbl = vec_nor(vbl, vbl);
+  vd = vec_nor(vd, vd);
+  vd = vec_nor(vd, vbl);
+  vd = vec_nor(vbl, vd);
+
+  vuc = vec_cntlz(vsc);
+  // CHECK: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.*}}, i1 false)
+  vuc = vec_cntlz(vuc);
+  // CHECK: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.*}}, i1 false)
+  vus = vec_cntlz(vss);
+  // CHECK: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.*}}, i1 false)
+  vus = vec_cntlz(vus);
+  // CHECK: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %{{.*}}, i1 false)
+  vui = vec_cntlz(vsi);
+  // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false)
+  vui = vec_cntlz(vui);
+  // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false)
+  vul = vec_cntlz(vsl);
+  // CHECK: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false)
+  vul = vec_cntlz(vul);
+  // CHECK: call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false)
+
+  vuc = vec_cnttz(vsc);
+  // CHECK: call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %{{.*}}, i1 false)
+  vuc = vec_cnttz(vuc);
+  // CHECK: call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %{{.*}}, i1 false)
+  vus = vec_cnttz(vss);
+  // CHECK: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %{{.*}}, i1 false)
+  vus = vec_cnttz(vus);
+  // CHECK: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %{{.*}}, i1 false)
+  vui = vec_cnttz(vsi);
+  // CHECK: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %{{.*}}, i1 false)
+  vui = vec_cnttz(vui);
+  // CHECK: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %{{.*}}, i1 false)
+  vul = vec_cnttz(vsl);
+  // CHECK: call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %{{.*}}, i1 false)
+  vul = vec_cnttz(vul);
+  // CHECK: call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %{{.*}}, i1 false)
+
+  vuc = vec_popcnt(vsc);
+  // CHECK: call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %{{.*}})
+  vuc = vec_popcnt(vuc);
+  // CHECK: call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %{{.*}})
+  vus = vec_popcnt(vss);
+  // CHECK: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %{{.*}})
+  vus = vec_popcnt(vus);
+  // CHECK: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %{{.*}})
+  vui = vec_popcnt(vsi);
+  // CHECK: call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %{{.*}})
+  vui = vec_popcnt(vui);
+  // CHECK: call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %{{.*}})
+  vul = vec_popcnt(vsl);
+  // CHECK: call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %{{.*}})
+  vul = vec_popcnt(vul);
+  // CHECK: call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %{{.*}})
+
+  vsc = vec_rl(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_rl(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_rl(vss, vus);
+  // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_rl(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_rl(vsi, vui);
+  // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_rl(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vsl = vec_rl(vsl, vul);
+  // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  vul = vec_rl(vul, vul);
+  // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vsc = vec_rli(vsc, ul);
+  // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+  vuc = vec_rli(vuc, ul);
+  // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+  vss = vec_rli(vss, ul);
+  // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+  vus = vec_rli(vus, ul);
+  // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+  vsi = vec_rli(vsi, ul);
+  // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+  vui = vec_rli(vui, ul);
+  // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+  vsl = vec_rli(vsl, ul);
+  // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+  vul = vec_rli(vul, ul);
+  // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+
+  vsc = vec_rl_mask(vsc, vuc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.verimb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsc = vec_rl_mask(vsc, vuc, 255);
+  // CHECK: call <16 x i8> @llvm.s390.verimb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+  vuc = vec_rl_mask(vuc, vuc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.verimb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_rl_mask(vuc, vuc, 255);
+  // CHECK: call <16 x i8> @llvm.s390.verimb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+  vss = vec_rl_mask(vss, vus, 0);
+  // CHECK: call <8 x i16> @llvm.s390.verimh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vss = vec_rl_mask(vss, vus, 255);
+  // CHECK: call <8 x i16> @llvm.s390.verimh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 255)
+  vus = vec_rl_mask(vus, vus, 0);
+  // CHECK: call <8 x i16> @llvm.s390.verimh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_rl_mask(vus, vus, 255);
+  // CHECK: call <8 x i16> @llvm.s390.verimh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 255)
+  vsi = vec_rl_mask(vsi, vui, 0);
+  // CHECK: call <4 x i32> @llvm.s390.verimf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vsi = vec_rl_mask(vsi, vui, 255);
+  // CHECK: call <4 x i32> @llvm.s390.verimf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 255)
+  vui = vec_rl_mask(vui, vui, 0);
+  // CHECK: call <4 x i32> @llvm.s390.verimf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_rl_mask(vui, vui, 255);
+  // CHECK: call <4 x i32> @llvm.s390.verimf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 255)
+  vsl = vec_rl_mask(vsl, vul, 0);
+  // CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vsl = vec_rl_mask(vsl, vul, 255);
+  // CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 255)
+  vul = vec_rl_mask(vul, vul, 0);
+  // CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 0)
+  vul = vec_rl_mask(vul, vul, 255);
+  // CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 255)
+
+  vsc = vec_sll(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_sll(vsc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_sll(vsc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sll(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sll(vuc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sll(vuc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sll(vbc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sll(vbc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sll(vbc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sll(vss, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sll(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sll(vss, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sll(vus, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sll(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sll(vus, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sll(vbs, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sll(vbs, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sll(vbs, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sll(vsi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sll(vsi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sll(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sll(vui, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sll(vui, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sll(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sll(vbi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sll(vbi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sll(vbi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sll(vsl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sll(vsl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sll(vsl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sll(vul, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sll(vul, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sll(vul, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sll(vbl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sll(vbl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sll(vbl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_slb(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_slb(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_slb(vuc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_slb(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_slb(vss, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_slb(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_slb(vus, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_slb(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_slb(vsi, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_slb(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_slb(vui, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_slb(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_slb(vsl, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_slb(vsl, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_slb(vul, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_slb(vul, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_slb(vd, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_slb(vd, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_sld(vsc, vsc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsc = vec_sld(vsc, vsc, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vuc = vec_sld(vuc, vuc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_sld(vuc, vuc, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vss = vec_sld(vss, vss, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_sld(vss, vss, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vus = vec_sld(vus, vus, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_sld(vus, vus, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vsi = vec_sld(vsi, vsi, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsi = vec_sld(vsi, vsi, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vui = vec_sld(vui, vui, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vui = vec_sld(vui, vui, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vsl = vec_sld(vsl, vsl, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsl = vec_sld(vsl, vsl, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vul = vec_sld(vul, vul, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vul = vec_sld(vul, vul, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+  vd = vec_sld(vd, vd, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vd = vec_sld(vd, vd, 15);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+
+  vsc = vec_sldw(vsc, vsc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsc = vec_sldw(vsc, vsc, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vuc = vec_sldw(vuc, vuc, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_sldw(vuc, vuc, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vss = vec_sldw(vss, vss, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_sldw(vss, vss, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vus = vec_sldw(vus, vus, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_sldw(vus, vus, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vsi = vec_sldw(vsi, vsi, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsi = vec_sldw(vsi, vsi, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vui = vec_sldw(vui, vui, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vui = vec_sldw(vui, vui, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vsl = vec_sldw(vsl, vsl, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vsl = vec_sldw(vsl, vsl, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vul = vec_sldw(vul, vul, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vul = vec_sldw(vul, vul, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vd = vec_sldw(vd, vd, 0);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vd = vec_sldw(vd, vd, 3);
+  // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+
+  vsc = vec_sral(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_sral(vsc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_sral(vsc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sral(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sral(vuc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sral(vuc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sral(vbc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sral(vbc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_sral(vbc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sral(vss, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sral(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_sral(vss, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sral(vus, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sral(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_sral(vus, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sral(vbs, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sral(vbs, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_sral(vbs, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sral(vsi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sral(vsi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_sral(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sral(vui, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sral(vui, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sral(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sral(vbi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sral(vbi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_sral(vbi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sral(vsl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sral(vsl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_sral(vsl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sral(vul, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sral(vul, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_sral(vul, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sral(vbl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sral(vbl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_sral(vbl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_srab(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_srab(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srab(vuc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srab(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srab(vss, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srab(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srab(vus, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srab(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srab(vsi, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srab(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srab(vui, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srab(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srab(vsl, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srab(vsl, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srab(vul, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srab(vul, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_srab(vd, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_srab(vd, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_srl(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_srl(vsc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_srl(vsc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srl(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srl(vuc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srl(vuc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_srl(vbc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_srl(vbc, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbc = vec_srl(vbc, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srl(vss, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srl(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srl(vss, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srl(vus, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srl(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srl(vus, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_srl(vbs, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_srl(vbs, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbs = vec_srl(vbs, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srl(vsi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srl(vsi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srl(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srl(vui, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srl(vui, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srl(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_srl(vbi, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_srl(vbi, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbi = vec_srl(vbi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srl(vsl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srl(vsl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srl(vsl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srl(vul, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srl(vul, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srl(vul, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_srl(vbl, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_srl(vbl, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vbl = vec_srl(vbl, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_srb(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsc = vec_srb(vsc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srb(vuc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_srb(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srb(vss, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_srb(vss, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srb(vus, vss);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_srb(vus, vus);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srb(vsi, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_srb(vsi, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srb(vui, vsi);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_srb(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srb(vsl, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsl = vec_srb(vsl, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srb(vul, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vul = vec_srb(vul, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_srb(vd, vsl);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vd = vec_srb(vd, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_abs(vsc);
+  vss = vec_abs(vss);
+  vsi = vec_abs(vsi);
+  vsl = vec_abs(vsl);
+
+  vsc = vec_max(vsc, vsc);
+  vsc = vec_max(vsc, vbc);
+  vsc = vec_max(vbc, vsc);
+  vuc = vec_max(vuc, vuc);
+  vuc = vec_max(vuc, vbc);
+  vuc = vec_max(vbc, vuc);
+  vss = vec_max(vss, vss);
+  vss = vec_max(vss, vbs);
+  vss = vec_max(vbs, vss);
+  vus = vec_max(vus, vus);
+  vus = vec_max(vus, vbs);
+  vus = vec_max(vbs, vus);
+  vsi = vec_max(vsi, vsi);
+  vsi = vec_max(vsi, vbi);
+  vsi = vec_max(vbi, vsi);
+  vui = vec_max(vui, vui);
+  vui = vec_max(vui, vbi);
+  vui = vec_max(vbi, vui);
+  vsl = vec_max(vsl, vsl);
+  vsl = vec_max(vsl, vbl);
+  vsl = vec_max(vbl, vsl);
+  vul = vec_max(vul, vul);
+  vul = vec_max(vul, vbl);
+  vul = vec_max(vbl, vul);
+  vd = vec_max(vd, vd);
+
+  vsc = vec_min(vsc, vsc);
+  vsc = vec_min(vsc, vbc);
+  vsc = vec_min(vbc, vsc);
+  vuc = vec_min(vuc, vuc);
+  vuc = vec_min(vuc, vbc);
+  vuc = vec_min(vbc, vuc);
+  vss = vec_min(vss, vss);
+  vss = vec_min(vss, vbs);
+  vss = vec_min(vbs, vss);
+  vus = vec_min(vus, vus);
+  vus = vec_min(vus, vbs);
+  vus = vec_min(vbs, vus);
+  vsi = vec_min(vsi, vsi);
+  vsi = vec_min(vsi, vbi);
+  vsi = vec_min(vbi, vsi);
+  vui = vec_min(vui, vui);
+  vui = vec_min(vui, vbi);
+  vui = vec_min(vbi, vui);
+  vsl = vec_min(vsl, vsl);
+  vsl = vec_min(vsl, vbl);
+  vsl = vec_min(vbl, vsl);
+  vul = vec_min(vul, vul);
+  vul = vec_min(vul, vbl);
+  vul = vec_min(vbl, vul);
+  vd = vec_min(vd, vd);
+
+  vuc = vec_addc(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vaccb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_addc(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vacch(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vui = vec_addc(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vaccf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vul = vec_addc(vul, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vaccg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vuc = vec_add_u128(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vaq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_addc_u128(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vaccq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_adde_u128(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vacq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_addec_u128(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vacccq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_avg(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vavgb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_avg(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vavglb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_avg(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vavgh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_avg(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vavglh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_avg(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vavgf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_avg(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vavglf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vsl = vec_avg(vsl, vsl);
+  // CHECK: call <2 x i64> @llvm.s390.vavgg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  vul = vec_avg(vul, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vavglg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vui = vec_checksum(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vcksm(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vus = vec_gfmsum(vuc, vuc);
+  // CHECK: call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_gfmsum(vus, vus);
+  // CHECK: call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vul = vec_gfmsum(vui, vui);
+  // CHECK: call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vuc = vec_gfmsum_128(vul, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vus = vec_gfmsum_accum(vuc, vuc, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
+  vui = vec_gfmsum_accum(vus, vus, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
+  vul = vec_gfmsum_accum(vui, vui, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
+  vuc = vec_gfmsum_accum_128(vul, vul, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <16 x i8> %{{.*}})
+
+  vsc = vec_mladd(vsc, vsc, vsc);
+  vsc = vec_mladd(vuc, vsc, vsc);
+  vsc = vec_mladd(vsc, vuc, vuc);
+  vuc = vec_mladd(vuc, vuc, vuc);
+  vss = vec_mladd(vss, vss, vss);
+  vss = vec_mladd(vus, vss, vss);
+  vss = vec_mladd(vss, vus, vus);
+  vus = vec_mladd(vus, vus, vus);
+  vsi = vec_mladd(vsi, vsi, vsi);
+  vsi = vec_mladd(vui, vsi, vsi);
+  vsi = vec_mladd(vsi, vui, vui);
+  vui = vec_mladd(vui, vui, vui);
+
+  vsc = vec_mhadd(vsc, vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vmahb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_mhadd(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_mhadd(vss, vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vmahh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_mhadd(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_mhadd(vsi, vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vmahf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_mhadd(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vss = vec_meadd(vsc, vsc, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_meadd(vuc, vuc, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_meadd(vss, vss, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_meadd(vus, vus, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
+  vsl = vec_meadd(vsi, vsi, vsl);
+  // CHECK: call <2 x i64> @llvm.s390.vmaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
+  vul = vec_meadd(vui, vui, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vmalef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
+
+  vss = vec_moadd(vsc, vsc, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vmaob(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_moadd(vuc, vuc, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vmalob(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_moadd(vss, vss, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_moadd(vus, vus, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
+  vsl = vec_moadd(vsi, vsi, vsl);
+  // CHECK: call <2 x i64> @llvm.s390.vmaof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
+  vul = vec_moadd(vui, vui, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vmalof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
+
+  vsc = vec_mulh(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vmhb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_mulh(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_mulh(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vmhh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_mulh(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_mulh(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vmhf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_mulh(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vss = vec_mule(vsc, vsc);
+  // CHECK: call <8 x i16> @llvm.s390.vmeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_mule(vuc, vuc);
+  // CHECK: call <8 x i16> @llvm.s390.vmleb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_mule(vss, vss);
+  // CHECK: call <4 x i32> @llvm.s390.vmeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vui = vec_mule(vus, vus);
+  // CHECK: call <4 x i32> @llvm.s390.vmleh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsl = vec_mule(vsi, vsi);
+  // CHECK: call <2 x i64> @llvm.s390.vmef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vul = vec_mule(vui, vui);
+  // CHECK: call <2 x i64> @llvm.s390.vmlef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vss = vec_mulo(vsc, vsc);
+  // CHECK: call <8 x i16> @llvm.s390.vmob(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_mulo(vuc, vuc);
+  // CHECK: call <8 x i16> @llvm.s390.vmlob(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vsi = vec_mulo(vss, vss);
+  // CHECK: call <4 x i32> @llvm.s390.vmoh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vui = vec_mulo(vus, vus);
+  // CHECK: call <4 x i32> @llvm.s390.vmloh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsl = vec_mulo(vsi, vsi);
+  // CHECK: call <2 x i64> @llvm.s390.vmof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vul = vec_mulo(vui, vui);
+  // CHECK: call <2 x i64> @llvm.s390.vmlof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vuc = vec_subc(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vscbib(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vus = vec_subc(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vscbih(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vui = vec_subc(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vscbif(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vul = vec_subc(vul, vul);
+  // CHECK: call <2 x i64> @llvm.s390.vscbig(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  vuc = vec_sub_u128(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_subc_u128(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_sube_u128(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_subec_u128(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+
+  vui = vec_sum4(vuc, vuc);
+  // CHECK: call <4 x i32> @llvm.s390.vsumb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vui = vec_sum4(vus, vus);
+  // CHECK: call <4 x i32> @llvm.s390.vsumh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vul = vec_sum2(vus, vus);
+  // CHECK: call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vul = vec_sum2(vui, vui);
+  // CHECK: call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vuc = vec_sum_u128(vui, vui);
+  // CHECK: call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vuc = vec_sum_u128(vul, vul);
+  // CHECK: call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+  idx = vec_test_mask(vsc, vuc);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vuc, vuc);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vss, vus);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vus, vus);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vsi, vui);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vui, vui);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vsl, vul);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vul, vul);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  idx = vec_test_mask(vd, vul);
+  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+}
+
+void test_string(void) {
+  vsc = vec_cp_until_zero(vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vistrb(<16 x i8> %{{.*}})
+  vuc = vec_cp_until_zero(vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vistrb(<16 x i8> %{{.*}})
+  vbc = vec_cp_until_zero(vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vistrb(<16 x i8> %{{.*}})
+  vss = vec_cp_until_zero(vss);
+  // CHECK: call <8 x i16> @llvm.s390.vistrh(<8 x i16> %{{.*}})
+  vus = vec_cp_until_zero(vus);
+  // CHECK: call <8 x i16> @llvm.s390.vistrh(<8 x i16> %{{.*}})
+  vbs = vec_cp_until_zero(vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vistrh(<8 x i16> %{{.*}})
+  vsi = vec_cp_until_zero(vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vistrf(<4 x i32> %{{.*}})
+  vui = vec_cp_until_zero(vui);
+  // CHECK: call <4 x i32> @llvm.s390.vistrf(<4 x i32> %{{.*}})
+  vbi = vec_cp_until_zero(vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vistrf(<4 x i32> %{{.*}})
+
+  vsc = vec_cp_until_zero_cc(vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vistrbs(<16 x i8> %{{.*}})
+  vuc = vec_cp_until_zero_cc(vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vistrbs(<16 x i8> %{{.*}})
+  vbc = vec_cp_until_zero_cc(vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vistrbs(<16 x i8> %{{.*}})
+  vss = vec_cp_until_zero_cc(vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vistrhs(<8 x i16> %{{.*}})
+  vus = vec_cp_until_zero_cc(vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vistrhs(<8 x i16> %{{.*}})
+  vbs = vec_cp_until_zero_cc(vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vistrhs(<8 x i16> %{{.*}})
+  vsi = vec_cp_until_zero_cc(vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vistrfs(<4 x i32> %{{.*}})
+  vui = vec_cp_until_zero_cc(vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vistrfs(<4 x i32> %{{.*}})
+  vbi = vec_cp_until_zero_cc(vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vistrfs(<4 x i32> %{{.*}})
+
+  vsc = vec_cmpeq_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpeq_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpeq_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfeef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfeef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfeef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpeq_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpeq_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpeq_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpeq_or_0_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_or_0_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_or_0_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpeq_or_0_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_or_0_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_or_0_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpeq_or_0_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_or_0_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_or_0_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpeq_or_0_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_or_0_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpeq_or_0_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfeezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpeq_or_0_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_or_0_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpeq_or_0_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfeezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpeq_or_0_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_or_0_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpeq_or_0_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfeezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpne_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpne_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpne_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfenef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfenef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfenef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpne_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpne_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpne_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpne_or_0_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_or_0_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_or_0_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpne_or_0_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_or_0_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_or_0_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpne_or_0_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_or_0_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_or_0_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vsc = vec_cmpne_or_0_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_or_0_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vuc = vec_cmpne_or_0_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfenezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  vss = vec_cmpne_or_0_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_or_0_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vus = vec_cmpne_or_0_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfenezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  vsi = vec_cmpne_or_0_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_or_0_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  vui = vec_cmpne_or_0_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfenezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+
+  vbc = vec_cmprg(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbs = vec_cmprg(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrch(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbi = vec_cmprg(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+
+  vbc = vec_cmprg_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrcbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbs = vec_cmprg_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrchs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbi = vec_cmprg_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrcfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+
+  vuc = vec_cmprg_idx(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_cmprg_idx(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrch(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vui = vec_cmprg_idx(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vuc = vec_cmprg_idx_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrcbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_cmprg_idx_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrchs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vui = vec_cmprg_idx_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrcfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vuc = vec_cmprg_or_0_idx(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_cmprg_or_0_idx(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vui = vec_cmprg_or_0_idx(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vuc = vec_cmprg_or_0_idx_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrczbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vus = vec_cmprg_or_0_idx_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrczhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vui = vec_cmprg_or_0_idx_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrczfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vbc = vec_cmpnrg(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbs = vec_cmpnrg(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrch(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbi = vec_cmpnrg(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+
+  vbc = vec_cmpnrg_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrcbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbs = vec_cmpnrg_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrchs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbi = vec_cmpnrg_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrcfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+
+  vuc = vec_cmpnrg_idx(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vus = vec_cmpnrg_idx(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrch(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vui = vec_cmpnrg_idx(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vuc = vec_cmpnrg_idx_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrcbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vus = vec_cmpnrg_idx_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrchs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vui = vec_cmpnrg_idx_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrcfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vuc = vec_cmpnrg_or_0_idx(vuc, vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vus = vec_cmpnrg_or_0_idx(vus, vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vui = vec_cmpnrg_or_0_idx(vui, vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vuc = vec_cmpnrg_or_0_idx_cc(vuc, vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vstrczbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vus = vec_cmpnrg_or_0_idx_cc(vus, vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vstrczhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vui = vec_cmpnrg_or_0_idx_cc(vui, vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vstrczfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vbc = vec_find_any_eq(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbc = vec_find_any_eq(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbc = vec_find_any_eq(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+
+  vbc = vec_find_any_eq_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbc = vec_find_any_eq_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbc = vec_find_any_eq_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbs = vec_find_any_eq_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+  vbi = vec_find_any_eq_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 4)
+
+  vsc = vec_find_any_eq_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_find_any_eq_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vsi = vec_find_any_eq_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vsc = vec_find_any_eq_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_find_any_eq_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vsi = vec_find_any_eq_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vsc = vec_find_any_eq_or_0_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_or_0_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_or_0_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_find_any_eq_or_0_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_or_0_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_or_0_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vsi = vec_find_any_eq_or_0_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_or_0_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_or_0_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vsc = vec_find_any_eq_or_0_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_or_0_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vuc = vec_find_any_eq_or_0_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+  vss = vec_find_any_eq_or_0_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_or_0_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vus = vec_find_any_eq_or_0_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 0)
+  vsi = vec_find_any_eq_or_0_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_or_0_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+  vui = vec_find_any_eq_or_0_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 0)
+
+  vbc = vec_find_any_ne(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbc = vec_find_any_ne(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbc = vec_find_any_ne(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+
+  vbc = vec_find_any_ne_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbc = vec_find_any_ne_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbc = vec_find_any_ne_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbs = vec_find_any_ne_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+  vbi = vec_find_any_ne_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 12)
+
+  vsc = vec_find_any_ne_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vss = vec_find_any_ne_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vsi = vec_find_any_ne_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaef(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vsc = vec_find_any_ne_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaebs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vss = vec_find_any_ne_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaehs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vsi = vec_find_any_ne_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaefs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vsc = vec_find_any_ne_or_0_idx(vsc, vsc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_or_0_idx(vuc, vuc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_or_0_idx(vbc, vbc);
+  // CHECK: call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vss = vec_find_any_ne_or_0_idx(vss, vss);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_or_0_idx(vus, vus);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_or_0_idx(vbs, vbs);
+  // CHECK: call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vsi = vec_find_any_ne_or_0_idx(vsi, vsi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_or_0_idx(vui, vui);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_or_0_idx(vbi, vbi);
+  // CHECK: call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+
+  vsc = vec_find_any_ne_or_0_idx_cc(vsc, vsc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_or_0_idx_cc(vuc, vuc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vuc = vec_find_any_ne_or_0_idx_cc(vbc, vbc, &cc);
+  // CHECK: call { <16 x i8>, i32 } @llvm.s390.vfaezbs(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 8)
+  vss = vec_find_any_ne_or_0_idx_cc(vss, vss, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_or_0_idx_cc(vus, vus, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vus = vec_find_any_ne_or_0_idx_cc(vbs, vbs, &cc);
+  // CHECK: call { <8 x i16>, i32 } @llvm.s390.vfaezhs(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i32 8)
+  vsi = vec_find_any_ne_or_0_idx_cc(vsi, vsi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_or_0_idx_cc(vui, vui, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+  vui = vec_find_any_ne_or_0_idx_cc(vbi, vbi, &cc);
+  // CHECK: call { <4 x i32>, i32 } @llvm.s390.vfaezfs(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i32 8)
+}
+
+void test_float(void) {
+  vd = vec_abs(vd);
+  // CHECK: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}})
+
+  vd = vec_nabs(vd);
+  // CHECK: [[ABS:%[^ ]+]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}})
+  // CHECK-NEXT: fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[ABS]]
+
+  vd = vec_madd(vd, vd, vd);
+  // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  vd = vec_msub(vd, vd, vd);
+  // CHECK: [[NEG:%[^ ]+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.*}}
+  // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
+  vd = vec_sqrt(vd);
+  // CHECK: call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}})
+
+  vd = vec_ld2f(cptrf);
+  // CHECK: [[VAL:%[^ ]+]] = load <2 x float>, <2 x float>* %{{.*}}
+  // CHECK: fpext <2 x float> [[VAL]] to <2 x double>
+  vec_st2f(vd, ptrf);
+  // CHECK: [[VAL:%[^ ]+]] = fptrunc <2 x double> %{{.*}} to <2 x float>
+  // CHECK: store <2 x float> [[VAL]], <2 x float>* %{{.*}}
+
+  vd = vec_ctd(vsl, 0);
+  // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double>
+  vd = vec_ctd(vul, 0);
+  // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double>
+  vd = vec_ctd(vsl, 1);
+  // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double>
+  // CHECK: fmul <2 x double> [[VAL]], <double 5.000000e-01, double 5.000000e-01>
+  vd = vec_ctd(vul, 1);
+  // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double>
+  // CHECK: fmul <2 x double> [[VAL]], <double 5.000000e-01, double 5.000000e-01>
+  vd = vec_ctd(vsl, 31);
+  // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double>
+  // CHECK: fmul <2 x double> [[VAL]], <double 0x3E00000000000000, double 0x3E00000000000000>
+  vd = vec_ctd(vul, 31);
+  // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double>
+  // CHECK: fmul <2 x double> [[VAL]], <double 0x3E00000000000000, double 0x3E00000000000000>
+
+  vsl = vec_ctsl(vd, 0);
+  // CHECK: fptosi <2 x double> %{{.*}} to <2 x i64>
+  vul = vec_ctul(vd, 0);
+  // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64>
+  vsl = vec_ctsl(vd, 1);
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 2.000000e+00, double 2.000000e+00>
+  // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64>
+  vul = vec_ctul(vd, 1);
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 2.000000e+00, double 2.000000e+00>
+  // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64>
+  vsl = vec_ctsl(vd, 31);
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 0x41E0000000000000, double 0x41E0000000000000>
+  // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64>
+  vul = vec_ctul(vd, 31);
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 0x41E0000000000000, double 0x41E0000000000000>
+  // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64>
+
+  vd = vec_roundp(vd);
+  // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}})
+  vd = vec_ceil(vd);
+  // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}})
+  vd = vec_roundm(vd);
+  // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{.*}})
+  vd = vec_floor(vd);
+  // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> %{{.*}})
+  vd = vec_roundz(vd);
+  // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{.*}})
+  vd = vec_trunc(vd);
+  // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> %{{.*}})
+  vd = vec_roundc(vd);
+  // CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{.*}})
+  vd = vec_round(vd);
+  // CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4)
+
+  vbl = vec_fp_test_data_class(vd, 0, &cc);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 0)
+  vbl = vec_fp_test_data_class(vd, 4095, &cc);
+  // CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 4095)
+}
diff --git a/test/CodeGen/builtins-wasm.c b/test/CodeGen/builtins-wasm.c
new file mode 100644
index 0000000..15f2e9d
--- /dev/null
+++ b/test/CodeGen/builtins-wasm.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -O3 -emit-llvm -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -O3 -emit-llvm -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
+
+__SIZE_TYPE__ f1(void) {
+  return __builtin_wasm_memory_size();
+// WEBASSEMBLY32: call {{i.*}} @llvm.wasm.memory.size.i32()
+// WEBASSEMBLY64: call {{i.*}} @llvm.wasm.memory.size.i64()
+}
+
+void f2(long delta) {
+  __builtin_wasm_grow_memory(delta);
+// WEBASSEMBLY32: call void @llvm.wasm.grow.memory.i32(i32 %{{.*}})
+// WEBASSEMBLY64: call void @llvm.wasm.grow.memory.i64(i64 %{{.*}})
+}
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index 8a5b5a2..83b11a0 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -emit-llvm -o %t %s
+// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -fsyntax-only -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -42,7 +42,7 @@
   signed int          tmp_i;
   unsigned int        tmp_Ui;
   signed long long    tmp_LLi;
-//  unsigned long long  tmp_ULLi;
+  unsigned long long  tmp_ULLi;
   float               tmp_f;
   double              tmp_d;
 
@@ -102,6 +102,9 @@
   const V4d* tmp_V4dCp;
   const V8f* tmp_V8fCp;
 
+  tmp_V2LLi = __builtin_ia32_undef128();
+  tmp_V4LLi = __builtin_ia32_undef256();
+
   tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comile(tmp_V4f, tmp_V4f);
@@ -260,6 +263,24 @@
 
   (void) __builtin_ia32_ldmxcsr(tmp_Ui);
   tmp_Ui = __builtin_ia32_stmxcsr();
+  (void)__builtin_ia32_fxsave(tmp_vp);
+  (void)__builtin_ia32_fxsave64(tmp_vp);
+  (void)__builtin_ia32_fxrstor(tmp_vp);
+  (void)__builtin_ia32_fxrstor64(tmp_vp);
+
+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaves64(tmp_vp, tmp_ULLi);
+
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
   tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
@@ -365,12 +386,6 @@
   tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
   tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pmovsxbd128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovsxbq128(tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmovsxbw128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovsxdq128(tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmovsxwd128(tmp_V8s);
-  tmp_V2LLi = __builtin_ia32_pmovsxwq128(tmp_V8s);
   tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c);
   tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c);
   tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c);
@@ -450,14 +465,14 @@
   __builtin_ia32_movntdq256(tmp_V4LLip, tmp_V4LLi);
   __builtin_ia32_movntpd256(tmp_dp, tmp_V4d);
   __builtin_ia32_movntps256(tmp_fp, tmp_V8f);
-  tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2d);
-  tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4f);
-  tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4d);
-  tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8f);
-  __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2d, tmp_V2d);
-  __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4f, tmp_V4f);
-  __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4d, tmp_V4d);
-  __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8f, tmp_V8f);
+  tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi);
+  tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i);
+  tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4LLi);
+  tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8i);
+  __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2LLi, tmp_V2d);
+  __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4i, tmp_V4f);
+  __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4LLi, tmp_V4d);
+  __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8i, tmp_V8f);
 
 #ifdef USE_3DNOW
   tmp_V8c = __builtin_ia32_pavgusb(tmp_V8c, tmp_V8c);
diff --git a/test/CodeGen/builtinshufflevector2.c b/test/CodeGen/builtinshufflevector2.c
index 8712c99..0e7b2ec 100644
--- a/test/CodeGen/builtinshufflevector2.c
+++ b/test/CodeGen/builtinshufflevector2.c
@@ -3,7 +3,7 @@
 typedef float float4 __attribute__((ext_vector_type(4)));
 typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v(
 void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
 // CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, <i32 3, i32 3, i32 3, i32 3>
 // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0
@@ -27,14 +27,14 @@
   *A = __builtin_shufflevector( x, mask );
 }
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v_c(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_c(
 void clang_shufflevector_v_v_c( float4* A, float4 x, float4 y) {
 // CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
   *A = __builtin_shufflevector( x, y, 0, 4, 1, 5 );
 }
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v_undef(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_undef(
 void clang_shufflevector_v_v_undef( float4* A, float4 x, float4 y) {
 // CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 undef, i32 5>
 // CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 588a716..4e14d98 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -15,6 +15,11 @@
 // MSABI: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0) }
 // CHECK: @x = global [3 x i8] c"ola", align [[ALIGN]]
 
+// XFAIL: hexagon
+// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which
+// fails the check for "@f3.x = ... align [ALIGN]", since ALIGN is derived
+// from the alignment of a single i8, which is still 1.
+
 #if defined(__s390x__)
 unsigned char align = 2;
 #else
@@ -23,44 +28,44 @@
 
 void bar(const char *);
 
-// CHECK-LABEL: define void @f0()
+// CHECK-LABEL: define {{.*}}void @f0()
 void f0() {
   bar("hello");
-  // ITANIUM: call void @bar({{.*}} @.str
-  // MSABI: call void @bar({{.*}} @"\01??_C@_05CJBACGMB@hello?$AA@"
+  // ITANIUM: call {{.*}}void @bar({{.*}} @.str
+  // MSABI: call {{.*}}void @bar({{.*}} @"\01??_C@_05CJBACGMB@hello?$AA@"
 }
 
-// CHECK-LABEL: define void @f1()
+// CHECK-LABEL: define {{.*}}void @f1()
 void f1() {
   static char *x = "hello";
   bar(x);
   // CHECK: [[T1:%.*]] = load i8*, i8** @f1.x
-  // CHECK: call void @bar(i8* [[T1:%.*]])
+  // CHECK: call {{.*}}void @bar(i8* [[T1:%.*]])
 }
 
-// CHECK-LABEL: define void @f2()
+// CHECK-LABEL: define {{.*}}void @f2()
 void f2() {
   static char x[] = "hello";
   bar(x);
-  // CHECK: call void @bar({{.*}} @f2.x
+  // CHECK: call {{.*}}void @bar({{.*}} @f2.x
 }
 
-// CHECK-LABEL: define void @f3()
+// CHECK-LABEL: define {{.*}}void @f3()
 void f3() {
   static char x[8] = "hello";
   bar(x);
-  // CHECK: call void @bar({{.*}} @f3.x
+  // CHECK: call {{.*}}void @bar({{.*}} @f3.x
 }
 
 void gaz(void *);
 
-// CHECK-LABEL: define void @f4()
+// CHECK-LABEL: define {{.*}}void @f4()
 void f4() {
   static struct s {
     char *name;
   } x = { "hello" };
   gaz(&x);
-  // CHECK: call void @gaz({{.*}} @f4.x
+  // CHECK: call {{.*}}void @gaz({{.*}} @f4.x
 }
 
 char x[3] = "ola";
diff --git a/test/CodeGen/c-unicode.c b/test/CodeGen/c-unicode.c
new file mode 100644
index 0000000..13d4bbf
--- /dev/null
+++ b/test/CodeGen/c-unicode.c
@@ -0,0 +1,8 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang --target=x86_64--linug-gnu -S %s -o - | FileCheck %s -check-prefix=ALLOWED
+// RUN: not %clang --target=x86_64--linux-gnu -std=c89 -S %s -o - 2>&1 | FileCheck %s -check-prefix=DENIED
+int \uaccess = 0;
+// ALLOWED: "곎ss":
+// ALLOWED-NOT: "\uaccess":
+// DENIED: warning: universal character names are only valid in C99 or C++; treating as '\' followed by identifier [-Wunicode]
+// DENIED: error: expected identifier or '('
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index a869982..fb731df 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -103,21 +103,21 @@
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
@@ -157,29 +157,29 @@
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   _Atomic(PS) x = (PS){1,2,3};
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
 // CHECK-NEXT: [[T2:%.*]] = load atomic i64, i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
-// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
@@ -202,11 +202,120 @@
 // CHECK-NEXT: ret void
 }
 
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL:%.*]] = load atomic i64, i64* [[ADDR64]] seq_cst, align 8
+  // CHECK:   store i64 [[VAL]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
 
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8
+
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK:   define zeroext i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[RES_ADDR:%.*]] = alloca i8, align 1
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]] = load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8
+  // CHECK:   [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst
+  // CHECK:   [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
+  // CHECK:   [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
+  // CHECK:   br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}
+
+  // CHECK:   store i64 [[RES_VAL64]], i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   br label {{%.*}}
+
+  // CHECK:   [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8
+  // CHECK:   store i8 [[RES_BOOL8]], i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RES_BOOL8:%.*]] = load i8, i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1
+  // CHECK:   ret i1 [[RETVAL]]
+
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index a35eef9..ccb6421 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -12,8 +12,24 @@
 // they're sufficiently rare that it's not worth making sure that the semantics
 // are correct.
 
-// CHECK: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
-// CHECK: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+struct elem;
+
+struct ptr {
+    struct elem *ptr;
+};
+// CHECK-DAG: %struct.ptr = type { %struct.elem* }
+
+struct elem {
+    _Atomic(struct ptr) link;
+};
+// CHECK-DAG: %struct.elem = type { %struct.ptr }
+
+struct ptr object;
+// CHECK-DAG: @object = common global %struct.ptr zeroinitializer
+
+// CHECK-DAG: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
+// CHECK-DAG: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+
 
 typedef int __attribute__((vector_size(16))) vector;
 
@@ -238,21 +254,21 @@
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
@@ -294,22 +310,22 @@
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   _Atomic(PS) x = (PS){1,2,3};
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
@@ -351,14 +367,110 @@
 // CHECK-NEXT: ret void
 }
 
-// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
 
-// FIXME: none of these look right, but we can leave the "test" here
-// to make sure they at least don't crash.
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8*
+  // CHECK:   [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5)
+  // CHECK:   ret i1 [[RES]]
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
diff --git a/test/CodeGen/call.c b/test/CodeGen/call.c
index 7239111..eec6e52 100644
--- a/test/CodeGen/call.c
+++ b/test/CodeGen/call.c
@@ -18,7 +18,7 @@
 void Interpret() {
   JS_ReportErrorNumber(js_GetErrorMessage, 0);
   
-  // CHECK: call void ({{.*}}, ...) @JS_ReportErrorNumber({{.*}}@js_GetErrorMessage
+  // CHECK: call {{.*}}void ({{.*}}, ...) @JS_ReportErrorNumber({{.*}}@js_GetErrorMessage
 }
 
 
diff --git a/test/CodeGen/captured-statements-nested.c b/test/CodeGen/captured-statements-nested.c
index 6464243..b81705b 100644
--- a/test/CodeGen/captured-statements-nested.c
+++ b/test/CodeGen/captured-statements-nested.c
@@ -30,7 +30,7 @@
         param_arr[size - 1] = 2;
         arr[10][z.a] = 12;
 
-        // CHECK1: define internal void @__captured_stmt{{.*}}([[T]]
+        // CHECK1: define internal {{.*}}void @__captured_stmt{{.*}}([[T]]
         // CHECK1: [[PARAM_ARR_SIZE_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
         // CHECK1: [[PARAM_ARR_SIZE:%.+]] = load [[SIZE_TYPE]], [[SIZE_TYPE]]* [[PARAM_ARR_SIZE_REF]]
         // CHECK1: [[ARR_SIZE1_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 8
@@ -106,7 +106,7 @@
     }
   }();
 
-  // CHECK2: define internal void @{{.*}}test_nest_block_block_invoke
+  // CHECK2: define internal {{.*}}void @{{.*}}test_nest_block_block_invoke
   //
   // CHECK2: [[Z:%[0-9a-z_]*]] = alloca i{{[0-9]+}},
   // CHECK2: alloca %struct.anon{{.*}}
diff --git a/test/CodeGen/captured-statements.c b/test/CodeGen/captured-statements.c
index b4fbfd4..607ec8e 100644
--- a/test/CodeGen/captured-statements.c
+++ b/test/CodeGen/captured-statements.c
@@ -21,14 +21,14 @@
   // CHECK-1: %struct.anon = type { i32* }
   // CHECK-1: {{.+}} global float 3.0
   //
-  // CHECK-1: test1
+  // CHECK-1: @test1(
   // CHECK-1: alloca %struct.anon
   // CHECK-1: getelementptr inbounds %struct.anon, %struct.anon*
   // CHECK-1: store i32* %i
   // CHECK-1: call void @[[HelperName:__captured_stmt[\.0-9]+]]
 }
 
-// CHECK-1: define internal void @[[HelperName]](%struct.anon
+// CHECK-1: define internal {{.*}}void @[[HelperName]](%struct.anon
 // CHECK-1:   getelementptr inbounds %struct.anon{{.*}}, i32 0, i32 0
 // CHECK-1:   load i32*, i32**
 // CHECK-1:   load i32, i32*
@@ -43,12 +43,12 @@
     for (i = 0; i < x; i++)
       foo();
   }
-  // CHECK-2: test2
+  // CHECK-2: @test2(
   // CHECK-2-NOT: %i
   // CHECK-2: call void @[[HelperName:__captured_stmt[\.0-9]+]]
 }
 
-// CHECK-2: define internal void @[[HelperName]]
+// CHECK-2: define internal {{.*}}void @[[HelperName]]
 // CHECK-2-NOT: }
 // CHECK-2:   %i = alloca i32
 
@@ -60,7 +60,7 @@
   {
     arr[2] = vla_arr[size - 1];
   }
-  // CHECK-3: test3
+  // CHECK-3: @test3(
   // CHECK-3: alloca [5 x i32]
   // CHECK-3: call void @__captured_stmt
 }
@@ -93,7 +93,7 @@
   // CHECK-GLOBALS: call void @__captured_stmt[[HelperName:[\.0-9]+]](%[[Capture]]
 }
 
-// CHECK-GLOBALS: define internal void @__captured_stmt[[HelperName]]
+// CHECK-GLOBALS: define internal {{.*}}void @__captured_stmt[[HelperName]]
 // CHECK-GLOBALS-NOT: ret
 // CHECK-GLOBALS:   load i32, i32* @global
 // CHECK-GLOBALS:   load i32, i32* @
diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c
index 6695419..c2f01ae 100644
--- a/test/CodeGen/catch-undef-behavior.c
+++ b/test/CodeGen/catch-undef-behavior.c
@@ -1,8 +1,7 @@
-// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
-// RUN: %clang_cc1 -fsanitize-undefined-trap-on-error -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
+// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
+// RUN: %clang_cc1 -fsanitize-trap=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
 // RUN: %clang_cc1 -fsanitize=null -fsanitize-recover=null -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-NULL
 // RUN: %clang_cc1 -fsanitize=signed-integer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-OVERFLOW
-// REQUIRES: asserts
 
 // CHECK-UBSAN: @[[INT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
 
@@ -19,6 +18,17 @@
 // CHECK-UBSAN: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 14 {{.*}} @[[STRUCT_S]], i64 4, i8 3 }
 // CHECK-UBSAN: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 12 {{.*}} @{{.*}} }
 // CHECK-UBSAN: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 11 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[FP16:.*]] = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 1, i16 16, [9 x i8] c"'__fp16'\00" }
+// CHECK-UBSAN: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 8 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 10 {{.*}} @{{.*}} }
+// Make sure we check the fp16 type_mismatch data so we can easily match the signed char float_cast_overflow
+// CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} }
+// CHECK-UBSAN: @[[SCHAR:.*]] = private unnamed_addr constant { i16, i16, [14 x i8] } { i16 0, i16 7, [14 x i8] c"'signed char'\00" }
+// CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} }
+// CHECK-UBSAN: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 10 {{.*}} @{{.*}} }
 
 // CHECK-NULL: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}}
 
@@ -209,10 +219,11 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = icmp ule i128 %{{.*}}, -20282409603651670423947251286016
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1000]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1000
   return n;
 }
 
@@ -223,10 +234,11 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1100]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1100
   *p = n;
 }
 
@@ -239,10 +251,11 @@
 
   // CHECK-UBSAN: %[[CAST:.*]] = bitcast float %[[F]] to i32
   // CHECK-UBSAN: %[[ARG:.*]] = zext i32 %[[CAST]] to i64
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow({{.*}}, i64 %[[ARG]]
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1200]] to i8*), i64 %[[ARG]]
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1200
   return f;
 }
 
@@ -255,12 +268,13 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: store x86_fp80 %[[F]], x86_fp80* %[[ALLOCA:.*]], !nosanitize
+  // CHECK-UBSAN: store x86_fp80 %[[F]], x86_fp80* %[[ALLOCA:.*]], align 16, !nosanitize
   // CHECK-UBSAN: %[[ARG:.*]] = ptrtoint x86_fp80* %[[ALLOCA]] to i64
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow({{.*}}, i64 %[[ARG]]
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1300]] to i8*), i64 %[[ARG]]
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1300
   return ld;
 }
 
@@ -271,10 +285,11 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1400]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1400
   return f;
 }
 
@@ -285,10 +300,11 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1500]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1500
   return *p;
 }
 
@@ -301,10 +317,11 @@
   // CHECK-COMMON: %[[INBOUNDS:.*]] = xor i1 %[[OUTOFBOUNDS]], true
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1600]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1600
   return f;
 }
 
diff --git a/test/CodeGen/cfi-icall-cross-dso.c b/test/CodeGen/cfi-icall-cross-dso.c
new file mode 100644
index 0000000..9337b18
--- /dev/null
+++ b/test/CodeGen/cfi-icall-cross-dso.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fsanitize=cfi-icall -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 -fsanitize=cfi-icall  -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s
+
+void caller(void (*f)()) {
+  f();
+}
+
+static void g(void) {}
+void h(void);
+
+typedef void (*Fn)(void);
+Fn g1() {
+  return &g;
+}
+Fn h1() {
+  return &h;
+}
+
+inline void foo() {}
+void bar() { foo(); }
+
+// ITANIUM: call i1 @llvm.bitset.test(i8* %{{.*}}, metadata !"_ZTSFvE"), !nosanitize
+// ITANIUM: call void @__cfi_slowpath(i64 6588678392271548388, i8* %{{.*}}) {{.*}}, !nosanitize
+
+// MS: call i1 @llvm.bitset.test(i8* %{{.*}}, metadata !"?6AX@Z"), !nosanitize
+// MS: call void @__cfi_slowpath(i64 4195979634929632483, i8* %{{.*}}) {{.*}}, !nosanitize
+
+// ITANIUM: define available_externally void @foo()
+// MS: define linkonce_odr void @foo()
+
+// Check that we emit both string and hash based bit set entries for static void g(),
+// and don't emit them for the declaration of h().
+
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+// CHECK: !{!"{{.*}}", void ()* @g, i64 0}
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+// CHECK: !{i64 {{.*}}, void ()* @g, i64 0}
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+// ITANIUM: !{!"_ZTSFvE", void ()* @bar, i64 0}
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+// ITANIUM: !{i64 6588678392271548388, void ()* @bar, i64 0}
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+
+// MS: !{!"?6AX@Z", void ()* @foo, i64 0}
+// MS: !{i64 4195979634929632483, void ()* @foo, i64 0}
+
+// CHECK: !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/CodeGen/cfi-icall.c b/test/CodeGen/cfi-icall.c
new file mode 100644
index 0000000..d6cebef
--- /dev/null
+++ b/test/CodeGen/cfi-icall.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=MS %s
+
+// Tests that we assign appropriate identifiers to unprototyped functions.
+
+void f() {
+}
+
+void xf();
+
+void g(int b) {
+  void (*fp)() = b ? f : xf;
+  // ITANIUM: call i1 @llvm.bitset.test(i8* {{.*}}, metadata !"_ZTSFvE")
+  fp();
+}
+
+// ITANIUM-DAG: !{!"_ZTSFvE", void ()* @f, i64 0}
+// ITANIUM-DAG: !{!"_ZTSFvE", void (...)* @xf, i64 0}
+// MS-DAG: !{!"?6AX@Z", void ()* @f, i64 0}
+// MS-DAG: !{!"?6AX@Z", void (...)* @xf, i64 0}
diff --git a/test/CodeGen/cleanup-destslot-simple.c b/test/CodeGen/cleanup-destslot-simple.c
index b8328af..a1c5640 100644
--- a/test/CodeGen/cleanup-destslot-simple.c
+++ b/test/CodeGen/cleanup-destslot-simple.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=LIFETIME
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=LIFETIME
 
 // We shouldn't have markers at -O0 or with msan.
-// RUN: %clang_cc1 -O0 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s --check-prefix=CHECK
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - -fsanitize=memory | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -O0 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - -fsanitize=memory | FileCheck %s --check-prefix=CHECK
 
 // There is no exception to handle here, lifetime.end is not a destructor,
 // so there is no need have cleanup dest slot related code
diff --git a/test/CodeGen/complex-convert.c b/test/CodeGen/complex-convert.c
index 0db2588..5d2e9d7 100644
--- a/test/CodeGen/complex-convert.c
+++ b/test/CodeGen/complex-convert.c
@@ -21,7 +21,7 @@
   _Complex unsigned char cuc1;
   _Complex signed long long csll1;
   _Complex unsigned long long cull1;
-  // CHECK-LABEL: define void @foo(
+  // CHECK-LABEL: define {{.*}}void @foo(
   // Match the prototype to pick up the size of sc and sll.
   // CHECK: i[[CHSIZE:[0-9]+]]{{[^,]*}},
   // CHECK: i[[CHSIZE]]{{[^,]*}},
@@ -31,6 +31,8 @@
   // CHECK: alloca i[[CHSIZE]], align [[CHALIGN:[0-9]+]]
   // CHECK: alloca i[[LLSIZE]], align [[LLALIGN:[0-9]+]]
 
+  // CHECK: store i64 %ull,
+
   sc1 = csc;
   // CHECK: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR1]]
diff --git a/test/CodeGen/complex-math.c b/test/CodeGen/complex-math.c
index 36ef271..96c7ad9 100644
--- a/test/CodeGen/complex-math.c
+++ b/test/CodeGen/complex-math.c
@@ -3,6 +3,7 @@
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s --check-prefix=PPC
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -477,5 +478,8 @@
 _Complex double foo(_Complex double a, _Complex double b) {
   // ARM-LABEL: @foo(
   // ARM: call arm_aapcscc { double, double } @__muldc3
+
+  // ARM7K-LABEL: @foo(
+  // ARM7K: call { double, double } @__muldc3
   return a*b;
 }
diff --git a/test/CodeGen/debug-info-257-args.c b/test/CodeGen/debug-info-257-args.c
index c6ffa6e..ce8d093 100644
--- a/test/CodeGen/debug-info-257-args.c
+++ b/test/CodeGen/debug-info-257-args.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -x c++ -g -emit-llvm -triple x86_64-linux-gnu -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -debug-info-kind=limited -emit-llvm -triple x86_64-linux-gnu -o - %s | FileCheck %s
 // PR23332
 
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 255
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 256
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 257
+// CHECK: DILocalVariable(arg: 255
+// CHECK: DILocalVariable(arg: 256
+// CHECK: DILocalVariable(arg: 257
 void fn1(int, int, int, int, int, int, int, int, int, int, int, int, int, int,
          int, int, int, int, int, int, int, int, int, int, int, int, int, int,
          int, int, int, int, int, int, int, int, int, int, int, int, int, int,
diff --git a/test/CodeGen/debug-info-args.c b/test/CodeGen/debug-info-args.c
index 47c904b..ce21e7c 100644
--- a/test/CodeGen/debug-info-args.c
+++ b/test/CodeGen/debug-info-args.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 
 int somefunc(char *x, int y, double z) {
   
diff --git a/test/CodeGen/debug-info-block-decl.c b/test/CodeGen/debug-info-block-decl.c
index 5476d88..e45a2d8 100644
--- a/test/CodeGen/debug-info-block-decl.c
+++ b/test/CodeGen/debug-info-block-decl.c
@@ -1,11 +1,11 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -fblocks -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -fblocks -emit-llvm -o - %s | FileCheck %s
 // Assignment and block entry should point to the same line.
 // rdar://problem/14039866
 
 // CHECK: define{{.*}}@main()
 // CHECK: store{{.*}}bitcast{{.*}}, !dbg ![[ASSIGNMENT:[0-9]+]]
 // CHECK: define {{.*}} @__main_block_invoke
-// CHECK: dbg ![[BLOCK_ENTRY:[0-9]+]]
+// CHECK: , !dbg ![[BLOCK_ENTRY:[0-9]+]]
 
 int main()
 {
diff --git a/test/CodeGen/debug-info-block-out-return.c b/test/CodeGen/debug-info-block-out-return.c
index e0e5bd9..428a50c 100644
--- a/test/CodeGen/debug-info-block-out-return.c
+++ b/test/CodeGen/debug-info-block-out-return.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -fblocks -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -fblocks -emit-llvm -o - %s | FileCheck %s
 
 // Check that arg numbering is not affected by LLVM IR argument numbering -
 // since the latter is affected by return-by-out-parameter ABI requirements
@@ -11,8 +11,8 @@
 // out of order or not at all (the latter would occur if they were both assigned
 // the same argument number by mistake).
 
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", arg: 1,{{.*}}line: 2,
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: "param", arg: 2,{{.*}}line: 2,
+// CHECK: !DILocalVariable(name: ".block_descriptor", arg: 1,{{.*}}line: 2,
+// CHECK: !DILocalVariable(name: "param", arg: 2,{{.*}}line: 2,
 
 // Line directive so we don't have to worry about how many lines preceed the
 // test code (as the line number is mangled in with the argument number as shown
diff --git a/test/CodeGen/debug-info-block.c b/test/CodeGen/debug-info-block.c
index c4930bf..d23095b 100644
--- a/test/CodeGen/debug-info-block.c
+++ b/test/CodeGen/debug-info-block.c
@@ -1,10 +1,29 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // Verify that the desired debugging type is generated for a structure
-//  member that is a pointer to a block. 
+// member that is a pointer to a block.
 
-// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_generic"
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, scope
+// CHECK-NOT:              line
+// CHECK-SAME:             elements: ![[ELEMS1:.*]])
+// CHECK: ![[ELEMS1]] = {{.*, .*, .*,}} ![[FPEL1:.*]], {{.*}}
+// CHECK: ![[INT:.*]] = !DIBasicType(name: "int"
+// CHECK: ![[FPEL1]] = {{.*}}"__FuncPtr", {{.*}}, baseType: ![[FPTY1:[0-9]+]]
+// CHECK: ![[FPTY1]] = {{.*}}baseType: ![[FNTY1:[0-9]+]]
+// CHECK: ![[FNTY1]] = !DISubroutineType(types: ![[VOIDVOID:[0-9]+]])
+// CHECK: ![[VOIDVOID]] = !{null, null}
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "__block_descriptor"
-struct inStruct {
-  void (^genericBlockPtr)();
-} is;
+// CHECK-NOT:              line
+// CHECK-SAME:            )
 
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, scope
+// CHECK-NOT:              line
+// CHECK-SAME:             elements: ![[ELEMS2:.*]])
+// CHECK: ![[ELEMS2]] = {{.*,.*,.*}}, ![[FPEL2:.*]], {{.*}}
+// CHECK: ![[FPEL2]] = {{.*}}"__FuncPtr", {{.*}}, baseType: ![[FPTY2:[0-9]+]]
+// CHECK: ![[FPTY2]] = {{.*}}baseType: ![[FNTY2:[0-9]+]]
+// CHECK: ![[FNTY2]] = !DISubroutineType(types: ![[INTINT:[0-9]+]])
+// CHECK: ![[INTINT]] = !{![[INT]], ![[INT]]}
+struct inStruct {
+  void (^voidBlockPtr)();
+  int (^intBlockPtr)(int);
+} is;
diff --git a/test/CodeGen/debug-info-compilation-dir.c b/test/CodeGen/debug-info-compilation-dir.c
index 4b47299..be2cc35 100644
--- a/test/CodeGen/debug-info-compilation-dir.c
+++ b/test/CodeGen/debug-info-compilation-dir.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -fdebug-compilation-dir /nonsense -emit-llvm -g %s -o - | FileCheck -check-prefix=CHECK-NONSENSE %s
+// RUN: %clang_cc1 -fdebug-compilation-dir /nonsense -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-NONSENSE %s
 // CHECK-NONSENSE: nonsense
 
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck -check-prefix=CHECK-DIR %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-DIR %s
 // CHECK-DIR: CodeGen
 
diff --git a/test/CodeGen/debug-info-crash.c b/test/CodeGen/debug-info-crash.c
index f04548b..9214909 100644
--- a/test/CodeGen/debug-info-crash.c
+++ b/test/CodeGen/debug-info-crash.c
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple i386-apple-darwin10 -fblocks -g -S %s -o -
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -fblocks -debug-info-kind=limited -S %s -o -
 
 // rdar://7590323
 typedef struct dispatch_queue_s *dispatch_queue_t;
diff --git a/test/CodeGen/debug-info-enum.c b/test/CodeGen/debug-info-enum.c
index 4474e40..5454eb5 100644
--- a/test/CodeGen/debug-info-enum.c
+++ b/test/CodeGen/debug-info-enum.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "e"
 // CHECK-SAME:             elements: [[TEST3_ENUMS:![0-9]*]]
diff --git a/test/CodeGen/debug-info-gline-tables-only.c b/test/CodeGen/debug-info-gline-tables-only.c
index 067d8e7..e890dbb 100644
--- a/test/CodeGen/debug-info-gline-tables-only.c
+++ b/test/CodeGen/debug-info-gline-tables-only.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -gline-tables-only -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=line-tables-only -S -emit-llvm -o - | FileCheck %s
 // Checks that clang with "-gline-tables-only" doesn't emit debug info
 // for variables and types.
 
@@ -23,9 +23,8 @@
 // CHECK-NOT: DW_TAG_enumeration_type
 enum E { ZERO = 0, ONE = 1 };
 
-// CHECK-NOT: DW_TAG_arg_variable
+// CHECK-NOT: DILocalVariable
 int sum(int p, int q) {
-  // CHECK-NOT: DW_TAG_auto_variable
   int r = p + q;
   struct S s;
   enum E e;
diff --git a/test/CodeGen/debug-info-gline-tables-only2.c b/test/CodeGen/debug-info-gline-tables-only2.c
index be457ab..da17d41 100644
--- a/test/CodeGen/debug-info-gline-tables-only2.c
+++ b/test/CodeGen/debug-info-gline-tables-only2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -gline-tables-only -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=line-tables-only -S -emit-llvm -o - | FileCheck %s
 // Checks that clang with "-gline-tables-only" emits metadata for
 // compile unit, subprogram and file.
 
diff --git a/test/CodeGen/debug-info-line.c b/test/CodeGen/debug-info-line.c
index bc0d23a..24981b5 100644
--- a/test/CodeGen/debug-info-line.c
+++ b/test/CodeGen/debug-info-line.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -w -gline-tables-only -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -w -debug-info-kind=line-tables-only -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
 
 int f1(int a, int b) {
   // CHECK: icmp {{.*}}, !dbg [[DBG_F1:!.*]]
diff --git a/test/CodeGen/debug-info-line2.c b/test/CodeGen/debug-info-line2.c
index 893b021..fbdc6b1 100644
--- a/test/CodeGen/debug-info-line2.c
+++ b/test/CodeGen/debug-info-line2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-darwin-apple -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-darwin-apple -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // Radar 9199234
 
 int bar();
diff --git a/test/CodeGen/debug-info-line3.c b/test/CodeGen/debug-info-line3.c
index 8ba57e2..042571e 100644
--- a/test/CodeGen/debug-info-line3.c
+++ b/test/CodeGen/debug-info-line3.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -S -emit-llvm %s -o - | FileCheck %s
 
 void func(char c, char* d)
 {
diff --git a/test/CodeGen/debug-info-member.c b/test/CodeGen/debug-info-member.c
index 43d26f8..87d4a74 100644
--- a/test/CodeGen/debug-info-member.c
+++ b/test/CodeGen/debug-info-member.c
@@ -1,3 +1,3 @@
-// RUN: %clang_cc1 -emit-llvm -g < %s | grep DW_TAG_member 
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited < %s | grep DW_TAG_member
 
 struct A { int x; } a;
diff --git a/test/CodeGen/debug-info-packed-struct.c b/test/CodeGen/debug-info-packed-struct.c
new file mode 100644
index 0000000..189bbe4
--- /dev/null
+++ b/test/CodeGen/debug-info-packed-struct.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
+
+// CHECK: %struct.layout0 = type { i8, %struct.size8, i8 }
+// CHECK: %struct.layout1 = type <{ i8, %struct.size8_anon, i8, [2 x i8] }>
+// CHECK: %struct.layout2 = type <{ i8, %struct.size8_pack1, i8 }>
+// CHECK: %struct.layout3 = type <{ i8, [3 x i8], %struct.size8_pack4, i8, [3 x i8] }>
+
+// ---------------------------------------------------------------------
+// Not packed.
+// ---------------------------------------------------------------------
+struct size8 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout0 {
+  char l0_ofs0;
+  struct size8 l0_ofs8;
+  int l0_ofs16 : 1;
+};
+// CHECK: l0_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs8",
+// CHECK-SAME:     {{.*}}size: 64, align: 64, offset: 64)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs16",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 128)
+
+
+// ---------------------------------------------------------------------
+// Implicitly packed.
+// ---------------------------------------------------------------------
+struct size8_anon {
+  int : 4;
+  long long : 60;
+};
+struct layout1 {
+  char l1_ofs0;
+  struct size8_anon l1_ofs1;
+  int l1_ofs9 : 1;
+};
+// CHECK: l1_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs1",
+// CHECK-SAME:     {{.*}}size: 64, align: 8, offset: 8)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs9",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 72)
+
+
+// ---------------------------------------------------------------------
+// Explicitly packed.
+// ---------------------------------------------------------------------
+#pragma pack(1)
+struct size8_pack1 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout2 {
+  char l2_ofs0;
+  struct size8_pack1 l2_ofs1;
+  int l2_ofs9 : 1;
+};
+#pragma pack()
+// CHECK: l2_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs1",
+// CHECK-SAME:     {{.*}}size: 64, align: 8, offset: 8)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs9",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 72)
+
+
+
+// ---------------------------------------------------------------------
+// Explicitly packed with different alignment.
+// ---------------------------------------------------------------------
+#pragma pack(4)
+struct size8_pack4 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout3 {
+  char l3_ofs0;
+  struct size8_pack4 l3_ofs4;
+  int l3_ofs12 : 1;
+};
+#pragma pack()
+// CHECK: l3_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs4",
+// CHECK-SAME:     {{.*}}size: 64, align: 32, offset: 32)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs12",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 96)
+
+struct layout0 l0;
+struct layout1 l1;
+struct layout2 l2;
+struct layout3 l3;
diff --git a/test/CodeGen/debug-info-same-line.c b/test/CodeGen/debug-info-same-line.c
index 7b71f57..a791222 100644
--- a/test/CodeGen/debug-info-same-line.c
+++ b/test/CodeGen/debug-info-same-line.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -g -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -debug-info-kind=limited -o - | FileCheck %s
 // Here two temporary nodes are identical (but should not get uniqued) while
 // building the full debug type.
 typedef struct { long x; } foo; typedef struct {  foo *x; } bar;
diff --git a/test/CodeGen/debug-info-scope-file.c b/test/CodeGen/debug-info-scope-file.c
index 9706319..296ec05 100644
--- a/test/CodeGen/debug-info-scope-file.c
+++ b/test/CodeGen/debug-info-scope-file.c
@@ -1,12 +1,12 @@
-// RUN: %clang_cc1 -g -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm < %s | FileCheck %s
 
 // Check that, just because we emitted a function from a different file doesn't
 // mean we insert a file-change inside the next function.
 
 // CHECK: ret void, !dbg [[F1_LINE:![0-9]*]]
 // CHECK: ret void, !dbg [[F2_LINE:![0-9]*]]
-// CHECK: [[F1:![0-9]*]] = !DISubprogram(name: "f1",{{.*}} isDefinition: true
-// CHECK: [[F2:![0-9]*]] = !DISubprogram(name: "f2",{{.*}} isDefinition: true
+// CHECK: [[F1:![0-9]*]] = distinct !DISubprogram(name: "f1",{{.*}} isDefinition: true
+// CHECK: [[F2:![0-9]*]] = distinct !DISubprogram(name: "f2",{{.*}} isDefinition: true
 // CHECK: [[F1_LINE]] = !DILocation({{.*}}, scope: [[F1]])
 // CHECK: [[F2_LINE]] = !DILocation({{.*}}, scope: [[F2]])
 
diff --git a/test/CodeGen/debug-info-scope.c b/test/CodeGen/debug-info-scope.c
index aa6e5c1..a25f117 100644
--- a/test/CodeGen/debug-info-scope.c
+++ b/test/CodeGen/debug-info-scope.c
@@ -1,25 +1,19 @@
-// RUN: %clang_cc1 -g -emit-llvm < %s | FileCheck %s
-// RUN: %clang_cc1 -gline-tables-only -emit-llvm < %s | FileCheck --check-prefix=GMLT %s
+// RUN: %clang_cc1 -dwarf-version=4 -debug-info-kind=limited -disable-llvm-passes -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -dwarf-version=4 -debug-info-kind=line-tables-only -disable-llvm-passes -emit-llvm < %s | FileCheck --check-prefix=GMLT %s
 // Two variables with same name in separate scope.
 // Radar 8330217.
 int main() {
 	int j = 0;
 	int k = 0;
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK: !DILocalVariable(name: "i"
 // CHECK-NEXT: !DILexicalBlock(
 
-// FIXME: Looks like we don't actually need both these lexical blocks (disc 2
-// just refers to disc 1, nothing actually uses disc 2).
-// GMLT-NOT: !DILexicalBlock
-// GMLT: !DILexicalBlockFile({{.*}}, discriminator: 2)
-// GMLT-NOT: !DILexicalBlock
-// GMLT: !DILexicalBlockFile({{.*}}, discriminator: 1)
 // Make sure we don't have any more lexical blocks because we don't need them in
 // -gmlt.
 // GMLT-NOT: !DILexicalBlock
 	for (int i = 0; i < 10; i++)
 		j++;
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK: !DILocalVariable(name: "i"
 // CHECK-NEXT: !DILexicalBlock(
 // GMLT-NOT: !DILexicalBlock
 	for (int i = 0; i < 10; i++)
diff --git a/test/CodeGen/debug-info-static.c b/test/CodeGen/debug-info-static.c
index 115beaf..fbe2a00 100644
--- a/test/CodeGen/debug-info-static.c
+++ b/test/CodeGen/debug-info-static.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1  -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1  -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 
 // CHECK: !DIGlobalVariable({{.*}}variable: i32* @f.xyzzy
 void f(void)
diff --git a/test/CodeGen/debug-info-typedef.c b/test/CodeGen/debug-info-typedef.c
index 790e302..ea3b549 100644
--- a/test/CodeGen/debug-info-typedef.c
+++ b/test/CodeGen/debug-info-typedef.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -I%p %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -I%p %s -o - | FileCheck %s
 // Test that the location of the typedef points to the header file.
 #line 1 "a.c"
 #line 2 "b.h"
diff --git a/test/CodeGen/debug-info-vector.c b/test/CodeGen/debug-info-vector.c
index 1075643..6b27573 100644
--- a/test/CodeGen/debug-info-vector.c
+++ b/test/CodeGen/debug-info-vector.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 typedef int v4si __attribute__((__vector_size__(16)));
 
 v4si a;
diff --git a/test/CodeGen/debug-info-vla.c b/test/CodeGen/debug-info-vla.c
index 175c24c..371d106 100644
--- a/test/CodeGen/debug-info-vla.c
+++ b/test/CodeGen/debug-info-vla.c
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 void testVLAwithSize(int s)
 {
 // CHECK: dbg.declare
 // CHECK: dbg.declare({{.*}}, metadata ![[VAR:.*]], metadata ![[EXPR:.*]])
-// CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla",{{.*}} line: [[@LINE+2]]
+// CHECK: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+2]]
 // CHECK: ![[EXPR]] = !DIExpression(DW_OP_deref)
   int vla[s];
   int i;
diff --git a/test/CodeGen/debug-info.c b/test/CodeGen/debug-info.c
index 1a505ee..d122e7f 100644
--- a/test/CodeGen/debug-info.c
+++ b/test/CodeGen/debug-info.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 
 // PR3023
 void convert(void) {
diff --git a/test/CodeGen/debug-line-1.c b/test/CodeGen/debug-line-1.c
index be1da08..56f447e 100644
--- a/test/CodeGen/debug-line-1.c
+++ b/test/CodeGen/debug-line-1.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 // REQUIRES: asserts
 // PR9796
 
diff --git a/test/CodeGen/debug-prefix-map.c b/test/CodeGen/debug-prefix-map.c
new file mode 100644
index 0000000..dfb57bb
--- /dev/null
+++ b/test/CodeGen/debug-prefix-map.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-NO-MAIN-FILE-NAME
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var=empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-EVIL
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - -main-file-name debug-prefix-map.c | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - -fdebug-compilation-dir %p | FileCheck %s -check-prefix CHECK-COMPILATION-DIR
+
+#include "Inputs/stdio.h"
+
+int main(int argc, char **argv) {
+  (void)argc;
+  (void)argv;
+  return 0;
+}
+
+void test_rewrite_includes() {
+  __builtin_va_list argp;
+  vprintf("string", argp);
+}
+
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{/|\\5C}}<stdin>"
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}"
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-NO-MAIN-FILE-NAME-NOT: !DIFile(filename:
+
+// CHECK-EVIL: !DIFile(filename: "/var=empty{{[/\\]}}{{.*}}"
+// CHECK-EVIL: !DIFile(filename: "/var=empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-EVIL-NOT: !DIFile(filename:
+
+// CHECK: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}"
+// CHECK: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-NOT: !DIFile(filename:
+
+// CHECK-COMPILATION-DIR: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}", directory: "/var/empty")
+// CHECK-COMPILATION-DIR: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h", directory: "/var/empty")
+// CHECK-COMPILATION-DIR-NOT: !DIFile(filename:
diff --git a/test/CodeGen/dostmt.c b/test/CodeGen/dostmt.c
index 54973dc..67ef64e 100644
--- a/test/CodeGen/dostmt.c
+++ b/test/CodeGen/dostmt.c
@@ -71,6 +71,6 @@
 void test6() {
   do {
   } while (test6f(), 0);
-  // CHECK: call void @test6f()
+  // CHECK: call {{.*}}void @test6f()
 }
 
diff --git a/test/CodeGen/dwarf-version.c b/test/CodeGen/dwarf-version.c
index cb95f28..eadce21 100644
--- a/test/CodeGen/dwarf-version.c
+++ b/test/CodeGen/dwarf-version.c
@@ -2,10 +2,15 @@
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
 // RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
+// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
 // RUN: %clang -target x86_64-apple-darwin -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target powerpc-unknown-openbsd -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target powerpc-unknown-freebsd -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target i386-pc-solaris -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
+//
+// Test what -gcodeview and -gdwarf do on Windows.
+// RUN: %clang -target i686-pc-windows-msvc -gcodeview -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NODWARF --check-prefix=CODEVIEW
+// RUN: %clang -target i686-pc-windows-msvc -gdwarf -gcodeview -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 --check-prefix=CODEVIEW
 int main (void) {
   return 0;
 }
@@ -13,3 +18,7 @@
 // VER2: !{i32 2, !"Dwarf Version", i32 2}
 // VER3: !{i32 2, !"Dwarf Version", i32 3}
 // VER4: !{i32 2, !"Dwarf Version", i32 4}
+
+// NODWARF-NOT: !"Dwarf Version"
+// CODEVIEW: !{i32 2, !"CodeView", i32 1}
+// NODWARF-NOT: !"Dwarf Version"
diff --git a/test/CodeGen/enable_if.c b/test/CodeGen/enable_if.c
new file mode 100644
index 0000000..f863d80
--- /dev/null
+++ b/test/CodeGen/enable_if.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-linux-gnu | FileCheck %s
+
+// Verifying that we do, in fact, select the correct function in the following
+// cases.
+
+void foo(int m) __attribute__((overloadable, enable_if(m > 0, "")));
+void foo(int m) __attribute__((overloadable));
+
+// CHECK-LABEL: define void @test1
+void test1() {
+  // CHECK: store void (i32)* @_Z3fooi
+  void (*p)(int) = foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  void (*p2)(int) = &foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  p = foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  p = &foo;
+
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  void *vp1 = (void*)&foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  void *vp2 = (void*)foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  vp1 = (void*)&foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  vp1 = (void*)foo;
+}
+
+void bar(int m) __attribute__((overloadable, enable_if(m > 0, "")));
+void bar(int m) __attribute__((overloadable, enable_if(1, "")));
+// CHECK-LABEL: define void @test2
+void test2() {
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  void (*p)(int) = bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  void (*p2)(int) = &bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  p = bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  p = &bar;
+
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  void *vp1 = (void*)&bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  void *vp2 = (void*)bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  vp1 = (void*)&bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  vp1 = (void*)bar;
+}
+
+void baz(int m) __attribute__((overloadable, enable_if(1, "")));
+void baz(int m) __attribute__((overloadable));
+// CHECK-LABEL: define void @test3
+void test3() {
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  void (*p)(int) = baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  void (*p2)(int) = &baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  p = baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  p = &baz;
+}
+
+
+const int TRUEFACTS = 1;
+void qux(int m) __attribute__((overloadable, enable_if(1, ""),
+                               enable_if(TRUEFACTS, "")));
+void qux(int m) __attribute__((overloadable, enable_if(1, "")));
+// CHECK-LABEL: define void @test4
+void test4() {
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  void (*p)(int) = qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  void (*p2)(int) = &qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  p = qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  p = &qux;
+}
diff --git a/test/CodeGen/enum2.c b/test/CodeGen/enum2.c
index 3203627..9729ad0 100644
--- a/test/CodeGen/enum2.c
+++ b/test/CodeGen/enum2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown %s -g -emit-llvm -o /dev/null
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -debug-info-kind=limited -emit-llvm -o /dev/null
 int v;
 enum e { MAX };
 
diff --git a/test/CodeGen/exceptions-seh-finally.c b/test/CodeGen/exceptions-seh-finally.c
index 345d514..f0ed223 100644
--- a/test/CodeGen/exceptions-seh-finally.c
+++ b/test/CodeGen/exceptions-seh-finally.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
 
 void abort(void) __attribute__((noreturn));
 void might_crash(void);
@@ -17,18 +18,17 @@
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
 // CHECK: [[invoke_cont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@basic_finally@@"(i8 0, i8* %[[fp]])
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 0, i8* %[[fp]])
 // CHECK-NEXT: ret void
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@basic_finally@@"(i8 1, i8* %[[fp]])
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
-// CHECK: define internal void @"\01?fin$0@0@basic_finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK: define internal void @"\01?fin$0@0@basic_finally@@"({{.*}})
 // CHECK: call void @cleanup()
 
 // Mostly check that we don't double emit 'r' which would crash.
@@ -57,11 +57,11 @@
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
 // CHECK: [[invoke_cont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@label_in_finally@@"(i8 0, i8* %[[fp]])
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@label_in_finally@@"({{i8( zeroext)?}} 0, i8* %[[fp]])
 // CHECK: ret void
 
-// CHECK: define internal void @"\01?fin$0@0@label_in_finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK: define internal void @"\01?fin$0@0@label_in_finally@@"({{.*}})
 // CHECK: br label %[[l:[^ ]*]]
 //
 // CHECK: [[l]]
@@ -84,19 +84,18 @@
 // CHECK:     to label %[[invoke_cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
 // CHECK: [[invoke_cont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@use_abnormal_termination@@"(i8 0, i8* %[[fp]])
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@use_abnormal_termination@@"({{i8( zeroext)?}} 0, i8* %[[fp]])
 // CHECK: ret void
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@use_abnormal_termination@@"(i8 1, i8* %[[fp]])
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@use_abnormal_termination@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
-// CHECK: define internal void @"\01?fin$0@0@use_abnormal_termination@@"(i8 %abnormal_termination, i8* %frame_pointer)
-// CHECK: %[[abnormal_zext:[^ ]*]] = zext i8 %abnormal_termination to i32
+// CHECK: define internal void @"\01?fin$0@0@use_abnormal_termination@@"({{i8( zeroext)?}} %[[abnormal:abnormal_termination]], i8* %frame_pointer)
+// CHECK: %[[abnormal_zext:[^ ]*]] = zext i8 %[[abnormal]] to i32
 // CHECK: store i32 %[[abnormal_zext]], i32* @crashed
 // CHECK-NEXT: ret void
 
@@ -109,11 +108,10 @@
 }
 
 // CHECK-LABEL: define void @noreturn_noop_finally()
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@noreturn_noop_finally@@"(i8 0, i8* %[[fp]])
+// CHECK: call void @"\01?fin$0@0@noreturn_noop_finally@@"({{.*}})
 // CHECK: ret void
 
-// CHECK: define internal void @"\01?fin$0@0@noreturn_noop_finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK: define internal void @"\01?fin$0@0@noreturn_noop_finally@@"({{.*}})
 // CHECK: call void @abort()
 // CHECK: unreachable
 
@@ -130,18 +128,15 @@
 // CHECK:     to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
 // CHECK: [[cont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@noreturn_finally@@"(i8 0, i8* %[[fp]])
+// CHECK: call void @"\01?fin$0@0@noreturn_finally@@"({{.*}})
 // CHECK: ret void
 //
 // CHECK: [[lpad]]
-// CHECK: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@noreturn_finally@@"(i8 1, i8* %[[fp]])
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
+// CHECK: call void @"\01?fin$0@0@noreturn_finally@@"({{.*}})
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
-// CHECK: define internal void @"\01?fin$0@0@noreturn_finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK: define internal void @"\01?fin$0@0@noreturn_finally@@"({{.*}})
 // CHECK: call void @abort()
 // CHECK: unreachable
 
@@ -152,11 +147,10 @@
   }
 }
 // CHECK-LABEL: define i32 @finally_with_return()
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK-NEXT: call void @"\01?fin$0@0@finally_with_return@@"(i8 0, i8* %[[fp]])
+// CHECK: call void @"\01?fin$0@0@finally_with_return@@"({{.*}})
 // CHECK-NEXT: ret i32 42
 
-// CHECK: define internal void @"\01?fin$0@0@finally_with_return@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK: define internal void @"\01?fin$0@0@finally_with_return@@"({{.*}})
 // CHECK-NOT: br i1
 // CHECK-NOT: br label
 // CHECK: ret void
@@ -174,27 +168,26 @@
 }
 
 // CHECK-LABEL: define i32 @nested___finally___finally
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally@@"(i8 0, i8* %[[fp]])
+// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally@@"({{.*}})
 // CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
 // CHECK: [[outercont]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally@@"(i8 0, i8* %[[fp]])
+// CHECK: call void @"\01?fin$0@0@nested___finally___finally@@"({{.*}})
 // CHECK-NEXT: ret i32 0
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally@@"(i8 1, i8* %[[fp]])
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
+// CHECK: call void @"\01?fin$0@0@nested___finally___finally@@"({{.*}})
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
-// CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally@@"({{.*}})
 // CHECK: ret void
 
-// CHECK-LABEL: define internal void @"\01?fin$1@0@nested___finally___finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK-LABEL: define internal void @"\01?fin$1@0@nested___finally___finally@@"({{.*}})
 // CHECK: unreachable
 
+// FIXME: Our behavior seems suspiciously different.
+
 int nested___finally___finally_with_eh_edge() {
   __try {
     __try {
@@ -212,31 +205,28 @@
 // CHECK-NEXT: to label %[[invokecont:[^ ]*]] unwind label %[[lpad1:[^ ]*]]
 //
 // [[invokecont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"(i8 0, i8* %[[fp]])
-// CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
+// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"({{.*}})
+// CHECK-NEXT:       to label %[[outercont:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
 //
 // CHECK: [[outercont]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"(i8 0, i8* %[[fp]])
+// CHECK: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
 // CHECK-NEXT: ret i32 912
 //
 // CHECK: [[lpad1]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"(i8 1, i8* %[[fp]])
-// CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad2]]
+// CHECK-NEXT: %[[innerpad:[^ ]*]] = cleanuppad
+// CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"({{.*}})
+// CHECK-NEXT:    label %[[innercleanupretbb:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
+//
+// CHECK: [[innercleanupretbb]]
+// CHECK-NEXT: cleanupret from %[[innerpad]] unwind label %[[lpad2]]
 //
 // CHECK: [[lpad2]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"(i8 1, i8* %[[fp]])
-// CHECK: resume
+// CHECK-NEXT: %[[outerpad:[^ ]*]] = cleanuppad
+// CHECK: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
+// CHECK-NEXT: cleanupret from %[[outerpad]] unwind to caller
 
-// CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
 // CHECK: ret void
 
-// CHECK-LABEL: define internal void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"(i8 %abnormal_termination, i8* %frame_pointer)
+// CHECK-LABEL: define internal void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"({{.*}})
 // CHECK: unreachable
diff --git a/test/CodeGen/exceptions-seh-leave.c b/test/CodeGen/exceptions-seh-leave.c
index 36b896d..a0b1956 100644
--- a/test/CodeGen/exceptions-seh-leave.c
+++ b/test/CodeGen/exceptions-seh-leave.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - | opt -instnamer -S | FileCheck %s
 
 void g(void);
 
@@ -74,7 +74,7 @@
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
 // CHECK: [[tryleave]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@__leave_with___finally_simple@@"(i8 0, i8* %[[fp]])
 
 // __finally block doesn't return, __finally.cont doesn't exist.
@@ -94,7 +94,7 @@
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
 // CHECK: [[tryleave]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@__leave_with___finally_noreturn@@"(i8 0, i8* %[[fp]])
 
 // The "normal" case.
@@ -118,7 +118,7 @@
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
 // CHECK: [[tryleave]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@__leave_with___finally@@"(i8 0, i8* %[[fp]])
 
 
@@ -148,7 +148,7 @@
 // CHECK-NEXT:       to label %[[g1_cont1:.*]] unwind label %[[g1_lpad:.*]]
 
 // CHECK: [[g1_cont1]]
-// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$0@0@nested___except___finally@@"(i8 0, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[fin_cont:.*]] unwind label %[[g2_lpad:.*]]
 
@@ -157,13 +157,15 @@
 // CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[g1_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: catch i8* null
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: cleanuppad
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$0@0@nested___except___finally@@"(i8 1, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[g1_resume:.*]] unwind label %[[g2_lpad]]
+// CHECK: cleanupret {{.*}} unwind label %[[g2_lpad]]
 
 // CHECK: [[g2_lpad]]
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret
 // CHECK: br label %[[trycont]]
 
 // CHECK: [[trycont]]
@@ -197,29 +199,28 @@
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
-// CHECK: [[g1_cont]]
-// CHECK: store i32 16, i32* %myres
-// CHECK-NEXT: br label %[[trycont:[^ ]*]]
-
 // CHECK: [[g1_lpad]]
-// CHECK:  br label %[[except:[^ ]*]]
-
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret {{.*}} to label %[[except:[^ ]*]]
 // CHECK: [[except]]
-// CHECK-NEXT: invoke void @g()
+// CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
 
+// CHECK: [[g2_lpad]]
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret
+// CHECK: br label %[[trycont4:[^ ]*]]
+
+// CHECK: [[trycont4]]
+// CHECK-NEXT: ret i32 1
+
 // CHECK: [[g2_cont]]
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
 
-// CHECK: [[g2_lpad]]
-// CHECK: br label %[[outerexcept:[^ ]*]]
-
-// CHECK: [[outerexcept]]
-// CHECK-NEXT: br label %[[trycont4:[^ ]*]]
-
-// CHECK: [[trycont4]]
-// CHECK-NEXT: ret i32 1
+// CHECK: [[g1_cont]]
+// CHECK: store i32 16, i32* %myres
+// CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[trycont]]
 // CHECK-NEXT: store i32 51, i32* %myres
@@ -251,36 +252,33 @@
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
-// CHECK: [[g1_cont]]
-// CHECK-NEXT: br label %[[trycont:[^ ]*]]
-
 // CHECK: [[g1_lpad]]
-// CHECK:  br label %[[except:[^ ]*]]
-
-// CHECK: [[except]]
-// CHECK-NEXT: invoke void @g()
+// CHECK: catchpad
+// CHECK: catchret
+// CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
 
 // CHECK: [[g2_cont]]
-// CHECK-NEXT: br label %[[tryleave:[^ ]*]]
+// CHECK: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: 23
 
-// CHECK: [[g2_lpad]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 1, i8* %[[fp]])
-// CHECK-NEXT: br label %[[ehresume:[^ ]*]]
+// CHECK: [[g1_cont]]
+// CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[trycont]]
 // CHECK: store i32 51, i32* %
 // CHECK-NEXT: br label %[[tryleave]]
 
 // CHECK: [[tryleave]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 0, i8* %[[fp]])
 // CHECK-NEXT: ret i32 1
 
-// CHECK: [[ehresume]]
-// CHECK: resume
+// CHECK: [[g2_lpad]]
+// CHECK: cleanuppad
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 1, i8* %[[fp]])
+// CHECK: cleanupret {{.*}} unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___except@@"(i8 %abnormal_termination, i8* %frame_pointer)
 // CHECK: ret void
@@ -306,40 +304,34 @@
 // The order of basic blocks in the below doesn't matter.
 // CHECK-LABEL: define i32 @nested___finally___finally()
 
-// CHECK-LABEL: invoke void @g()
+// CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
 // CHECK: [[g1_cont]]
 // CHECK: store i32 16, i32* %[[myres:[^ ]*]],
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$1@0@nested___finally___finally@@"(i8 0, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[finally_cont:.*]] unwind label %[[g2_lpad:.*]]
 
 // CHECK: [[finally_cont]]
 // CHECK: store i32 51, i32* %[[myres]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally@@"(i8 0, i8* %[[fp]])
 // CHECK-NEXT: ret i32 1
 
 // CHECK: [[g1_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[padtoken:[^ ]*]] = cleanuppad within none []
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$1@0@nested___finally___finally@@"(i8 1, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[finally_cont2:.*]] unwind label %[[g2_lpad]]
+// CHECK: [[finally_cont2]]
+// CHECK: cleanupret from %[[padtoken]] unwind label %[[g2_lpad]]
 
 // CHECK: [[g2_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: br label %[[ehcleanup:.*]]
-
-// CHECK: [[finally_cont2]]
-// CHECK: br label %[[ehcleanup]]
-
-// CHECK: [[ehcleanup]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
+// CHECK-NEXT: %[[padtoken:[^ ]*]] = cleanuppad within none []
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally@@"(i8 1, i8* %[[fp]])
-// CHECK: resume
+// CHECK: cleanupret from %[[padtoken]] unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
 // CHECK: ret void
diff --git a/test/CodeGen/exceptions-seh.c b/test/CodeGen/exceptions-seh.c
index d8135e8..b027bd8 100644
--- a/test/CodeGen/exceptions-seh.c
+++ b/test/CodeGen/exceptions-seh.c
@@ -1,4 +1,11 @@
-// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+// RUN: %clang_cc1 %s -triple i686-pc-windows-gnu -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=X86-GNU
+// RUN: %clang_cc1 %s -triple x86_64-pc-windows-gnu -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=X64-GNU
 
 void try_body(int numerator, int denominator, int *myres) {
   *myres = numerator / denominator;
@@ -19,21 +26,42 @@
   *res = myres;
   return success;
 }
+
 // CHECK-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X64-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+// X86-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: invoke void @try_body(i32 %{{.*}}, i32 %{{.*}}, i32* %{{.*}}) #[[NOINLINE:[0-9]+]]
-// CHECK:       to label %{{.*}} unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %{{.*}} unwind label %[[catchpad:[^ ]*]]
 //
-// CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-// CHECK-NEXT: catch i8* null
-// CHECK-NOT: br i1
-// CHECK: br label %[[except:[^ ]*]]
+// CHECK: [[catchpad]]
+// X64: %[[padtoken:[^ ]*]] = catchpad within %{{[^ ]*}} [i8* null]
+// X86: %[[padtoken:[^ ]*]] = catchpad within %{{[^ ]*}} [i8* bitcast (i32 ()* @"\01?filt$0@0@safe_div@@" to i8*)]
+// CHECK-NEXT: catchret from %[[padtoken]] to label %[[except:[^ ]*]]
+//
 // CHECK: [[except]]
-// CHECK-NEXT: store i32 -42, i32* %[[success:[^ ]*]]
+// CHECK: store i32 -42, i32* %[[success:[^ ]*]]
 //
 // CHECK: %[[res:[^ ]*]] = load i32, i32* %[[success]]
 // CHECK: ret i32 %[[res]]
 
+// 32-bit SEH needs this filter to save the exception code.
+//
+// X86-LABEL: define internal i32 @"\01?filt$0@0@safe_div@@"()
+// X86: %[[ebp:[^ ]*]] = call i8* @llvm.frameaddress(i32 1)
+// X86: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 (i32, i32, i32*)* @safe_div to i8*), i8* %[[ebp]])
+// X86: call i8* @llvm.localrecover(i8* bitcast (i32 (i32, i32, i32*)* @safe_div to i8*), i8* %[[fp]], i32 0)
+// X86: load i8*, i8**
+// X86: load i32*, i32**
+// X86: load i32, i32*
+// X86: store i32 %{{.*}}, i32*
+// X86: ret i32 1
+
+// Mingw uses msvcrt, so it can also use _except_handler3.
+// X86-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X86-GNU-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
+// X64-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X64-GNU-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+
 void j(void);
 
 int filter_expr_capture(void) {
@@ -47,19 +75,28 @@
 }
 
 // CHECK-LABEL: define i32 @filter_expr_capture()
-// CHECK: call void (...) @llvm.frameescape(i32* %[[r:[^ ,]*]])
+// X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+// X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
+// X64: call void (...) @llvm.localescape(i32* %[[r:[^ ,]*]])
+// X86: call void (...) @llvm.localescape(i32* %[[r:[^ ,]*]], i32* %[[code:[^ ,]*]])
 // CHECK: store i32 42, i32* %[[r]]
 // CHECK: invoke void @j() #[[NOINLINE]]
 //
-// CHECK: landingpad
-// CHECK-NEXT: catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@filter_expr_capture@@" to i8*)
+// CHECK: catchpad within %{{[^ ]*}} [i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@filter_expr_capture@@" to i8*)]
 // CHECK: store i32 13, i32* %[[r]]
 //
 // CHECK: %[[rv:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[rv]]
 
-// CHECK-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"(i8* %exception_pointers, i8* %frame_pointer)
-// CHECK: call i8* @llvm.framerecover(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %frame_pointer, i32 0)
+// X64-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"(i8* %exception_pointers, i8* %frame_pointer)
+// X64: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %frame_pointer)
+// X64: call i8* @llvm.localrecover(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %[[fp]], i32 0)
+//
+// X86-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"()
+// X86: %[[ebp:[^ ]*]] = call i8* @llvm.frameaddress(i32 1)
+// X86: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %[[ebp]])
+// X86: call i8* @llvm.localrecover(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %[[fp]], i32 0)
+//
 // CHECK: store i32 -1, i32* %{{.*}}
 // CHECK: ret i32 -1
 
@@ -78,31 +115,23 @@
   return r;
 }
 // CHECK-LABEL: define i32 @nested_try()
+// X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+// X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: store i32 42, i32* %[[r:[^ ,]*]]
 // CHECK: invoke void @j() #[[NOINLINE]]
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[cswitch_inner:[^ ]*]]
 //
-// CHECK: [[cont]]
-// CHECK: store i32 0, i32* %[[r]]
-// CHECK: br label %[[inner_try_cont:[^ ]*]]
+// CHECK: [[cswitch_inner]]
+// CHECK: %[[cs_inner:[^ ]*]] = catchswitch within none [label %[[cpad_inner:[^ ]*]]] unwind label %[[cswitch_outer:[^ ]*]]
 //
-// CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-// CHECK: catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$1@0@nested_try@@" to i8*)
-// CHECK: catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@nested_try@@" to i8*)
-// CHECK: store i8* %{{.*}}, i8** %[[ehptr_slot:[^ ]*]]
-// CHECK: store i32 %{{.*}}, i32* %[[sel_slot:[^ ]*]]
+// CHECK: [[cswitch_outer]]
+// CHECK: %[[cs_outer:[^ ]*]] = catchswitch within none [label %[[cpad_outer:[^ ]*]]] unwind to caller
 //
-// CHECK: load i32, i32* %[[sel_slot]]
-// CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$1@0@nested_try@@" to i8*))
-// CHECK: icmp eq i32
-// CHECK: br i1
+// CHECK: [[cpad_outer]]
+// CHECK: catchpad within %{{[^ ]*}} [i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@nested_try@@" to i8*)]
+// CHECK-NEXT: catchret {{.*}} to label %[[except_outer:[^ ]*]]
 //
-// CHECK: load i32, i32* %[[sel_slot]]
-// CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@nested_try@@" to i8*))
-// CHECK: icmp eq i32
-// CHECK: br i1
-//
+// CHECK: [[except_outer]]
 // CHECK: store i32 456, i32* %[[r]]
 // CHECK: br label %[[outer_try_cont:[^ ]*]]
 //
@@ -110,45 +139,68 @@
 // CHECK: %[[r_load:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[r_load]]
 //
+// CHECK: [[cpad_inner]]
+// CHECK: catchpad within %[[cs_inner]] [i8* bitcast (i32 ({{.*}})* @"\01?filt$1@0@nested_try@@" to i8*)]
+// CHECK-NEXT: catchret {{.*}} to label %[[except_inner:[^ ]*]]
+//
+// CHECK: [[except_inner]]
 // CHECK: store i32 123, i32* %[[r]]
-// CHECK: br label %[[inner_try_cont]]
+// CHECK: br label %[[inner_try_cont:[^ ]*]]
 //
 // CHECK: [[inner_try_cont]]
 // CHECK: br label %[[outer_try_cont]]
+//
+// CHECK: [[cont]]
+// CHECK: store i32 0, i32* %[[r]]
+// CHECK: br label %[[inner_try_cont]]
+//
+// CHECK-LABEL: define internal i32 @"\01?filt$0@0@nested_try@@"({{.*}})
+// X86: call i8* @llvm.x86.seh.recoverfp({{.*}})
+// CHECK: load i32*, i32**
+// CHECK: load i32, i32*
+// CHECK: icmp eq i32 %{{.*}}, 456
+//
+// CHECK-LABEL: define internal i32 @"\01?filt$1@0@nested_try@@"({{.*}})
+// X86: call i8* @llvm.x86.seh.recoverfp({{.*}})
+// CHECK: load i32*, i32**
+// CHECK: load i32, i32*
+// CHECK: icmp eq i32 %{{.*}}, 123
 
-static unsigned g = 0;
-void basic_finally(void) {
-  ++g;
+int basic_finally(int g) {
   __try {
     j();
   } __finally {
-    --g;
+    ++g;
   }
+  return g;
 }
-// CHECK-LABEL: define void @basic_finally()
-// CHECK: load i32, i32* @g
-// CHECK: add i32 %{{.*}}, 1
-// CHECK: store i32 %{{.*}}, i32* @g
+// CHECK-LABEL: define i32 @basic_finally(i32 %g)
+// X64-SAME: personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+// X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
+// CHECK: %[[g_addr:[^ ]*]] = alloca i32, align 4
+// CHECK: call void (...) @llvm.localescape(i32* %[[g_addr]])
+// CHECK: store i32 %g, i32* %[[g_addr]]
 //
 // CHECK: invoke void @j()
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[cleanuppad:[^ ]*]]
 //
 // CHECK: [[cont]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@basic_finally@@"(i8 0, i8* %[[fp]])
-// CHECK: ret void
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 0, i8* %[[fp]])
+// CHECK: load i32, i32* %[[g_addr]], align 4
+// CHECK: ret i32
 //
-// CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.frameaddress(i32 0)
-// CHECK: call void @"\01?fin$0@0@basic_finally@@"(i8 1, i8* %[[fp]])
-// CHECK: resume
+// CHECK: [[cleanuppad]]
+// CHECK: %[[padtoken:[^ ]*]] = cleanuppad within none []
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
+// CHECK: cleanupret from %[[padtoken]] unwind to caller
 
-// CHECK: define internal void @"\01?fin$0@0@basic_finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
-// CHECK:   load i32, i32* @g, align 4
-// CHECK:   add i32 %{{.*}}, -1
-// CHECK:   store i32 %{{.*}}, i32* @g, align 4
+// CHECK: define internal void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} %abnormal_termination, i8* %frame_pointer)
+// CHECK:   call i8* @llvm.localrecover(i8* bitcast (i32 (i32)* @basic_finally to i8*), i8* %frame_pointer, i32 0)
+// CHECK:   load i32, i32* %{{.*}}, align 4
+// CHECK:   add nsw i32 %{{.*}}, 1
+// CHECK:   store i32 %{{.*}}, i32* %{{.*}}, align 4
 // CHECK:   ret void
 
 int returns_int(void);
@@ -161,18 +213,71 @@
 }
 // CHECK-LABEL: define i32 @except_return()
 // CHECK: %[[tmp:[^ ]*]] = invoke i32 @returns_int()
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[catchpad:[^ ]*]]
 //
-// CHECK: [[cont]]
-// CHECK: store i32 %[[tmp]], i32* %[[rv:[^ ]*]]
+// CHECK: [[catchpad]]
+// CHECK: catchpad
+// CHECK: catchret
+// CHECK: store i32 42, i32* %[[rv:[^ ]*]]
 // CHECK: br label %[[retbb:[^ ]*]]
 //
-// CHECK: [[lpad]]
-// CHECK: store i32 42, i32* %[[rv]]
+// CHECK: [[cont]]
+// CHECK: store i32 %[[tmp]], i32* %[[rv]]
 // CHECK: br label %[[retbb]]
 //
 // CHECK: [[retbb]]
 // CHECK: %[[r:[^ ]*]] = load i32, i32* %[[rv]]
 // CHECK: ret i32 %[[r]]
 
+
+// PR 24751: don't assert if a variable is used twice in a __finally block.
+// Also, make sure we don't do redundant work to capture/project it.
+void finally_capture_twice(int x) {
+  __try {
+  } __finally {
+    int y = x;
+    int z = x;
+  }
+}
+//
+// CHECK-LABEL: define void @finally_capture_twice(
+// CHECK:         [[X:%.*]] = alloca i32, align 4
+// CHECK:         call void (...) @llvm.localescape(i32* [[X]])
+// CHECK-NEXT:    store i32 {{.*}}, i32* [[X]], align 4
+// CHECK-NEXT:    [[LOCAL:%.*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT:    call void [[FINALLY:@.*]](i8{{ zeroext | }}0, i8* [[LOCAL]])
+// CHECK:       define internal void [[FINALLY]](
+// CHECK:         [[LOCAL:%.*]] = call i8* @llvm.localrecover(
+// CHECK:         [[X:%.*]] = bitcast i8* [[LOCAL]] to i32*
+// CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Z:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8*
+// CHECK-NEXT:    store i8
+// CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[X]], align 4
+// CHECK-NEXT:    store i32 [[T0]], i32* [[Y]], align 4
+// CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[X]], align 4
+// CHECK-NEXT:    store i32 [[T0]], i32* [[Z]], align 4
+// CHECK-NEXT:    ret void
+
+int exception_code_in_except(void) {
+  __try {
+    try_body(0, 0, 0);
+  } __except(1) {
+    return _exception_code();
+  }
+}
+
+// CHECK-LABEL: define i32 @exception_code_in_except()
+// CHECK: %[[ret_slot:[^ ]*]] = alloca i32
+// CHECK: %[[code_slot:[^ ]*]] = alloca i32
+// CHECK: invoke void @try_body(i32 0, i32 0, i32* null)
+// CHECK: %[[pad:[^ ]*]] = catchpad
+// CHECK: catchret from %[[pad]]
+// X64: %[[code:[^ ]*]] = call i32 @llvm.eh.exceptioncode(token %[[pad]])
+// X64: store i32 %[[code]], i32* %[[code_slot]]
+// CHECK: %[[ret1:[^ ]*]] = load i32, i32* %[[code_slot]]
+// CHECK: store i32 %[[ret1]], i32* %[[ret_slot]]
+// CHECK: %[[ret2:[^ ]*]] = load i32, i32* %[[ret_slot]]
+// CHECK: ret i32 %[[ret2]]
+
 // CHECK: attributes #[[NOINLINE]] = { {{.*noinline.*}} }
diff --git a/test/CodeGen/exceptions.c b/test/CodeGen/exceptions.c
index ae0af4d..039ec84 100644
--- a/test/CodeGen/exceptions.c
+++ b/test/CodeGen/exceptions.c
@@ -5,8 +5,8 @@
 void test1() {
   extern void test1_helper(void (^)(int));
 
-  // CHECK-LABEL:     define void @test1()
-  // CHECK-ARM-LABEL: define arm_aapcscc void @test1()
+  // CHECK-LABEL:     define void @test1() {{.*}} personality i8* bitcast (i32 (...)* @__gcc_personality_v0 to i8*)
+  // CHECK-ARM-LABEL: define arm_aapcscc void @test1() {{.*}} personality i8* bitcast (i32 (...)* @__gcc_personality_sj0 to i8*)
 
   __block int x = 10;
 
@@ -14,9 +14,9 @@
   // CHECK-ARM: invoke arm_aapcscc void @test1_helper(
   test1_helper(^(int v) { x = v; });
 
-  // CHECK:          landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gcc_personality_v0 to i8*)
+  // CHECK:          landingpad { i8*, i32 }
   // CHECK-NEXT:       cleanup
-  // CHECK-ARM:      landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gcc_personality_sj0 to i8*)
+  // CHECK-ARM:      landingpad { i8*, i32 }
   // CHECK-ARM-NEXT:   cleanup
 }
 
diff --git a/test/CodeGen/exprs.c b/test/CodeGen/exprs.c
index 59afa80..f46b574 100644
--- a/test/CodeGen/exprs.c
+++ b/test/CodeGen/exprs.c
@@ -127,9 +127,10 @@
   return A[X];
 
 // CHECK: [[Xaddr:%[^ ]+]] = alloca i64, align 8
-// CHECK: load {{.*}}, {{.*}}* [[Xaddr]]
-// CHECK-NEXT: getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 0, 
-// CHECK-NEXT: load i32, i32*
+// CHECK: [[A:%.*]] = alloca [100 x i32], align
+// CHECK: [[X:%.*]] = load {{.*}}, {{.*}}* [[Xaddr]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[X]]
+// CHECK-NEXT: load i32, i32* [[T0]], align 4
 }
 
 int f12() {
diff --git a/test/CodeGen/ext-vector-member-alignment.c b/test/CodeGen/ext-vector-member-alignment.c
index 5f044b8..686051e 100644
--- a/test/CodeGen/ext-vector-member-alignment.c
+++ b/test/CodeGen/ext-vector-member-alignment.c
@@ -14,14 +14,12 @@
   *a = p->position.y;
   *b = p->position[0];
   p->position[2] = c;
-  // FIXME: We should be able to come up with a more aggressive alignment
-  // estimate.
   // CHECK: @func
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 4
   // CHECK: ret void
 }
diff --git a/test/CodeGen/f16c-builtins.c b/test/CodeGen/f16c-builtins.c
index 28430d5..f9cfa0d 100644
--- a/test/CodeGen/f16c-builtins.c
+++ b/test/CodeGen/f16c-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,21 +6,25 @@
 #include <x86intrin.h>
 
 __m128 test_mm_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtph_ps
   // CHECK: @llvm.x86.vcvtph2ps.128
   return _mm_cvtph_ps(a);
 }
 
 __m256 test_mm256_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm256_cvtph_ps
   // CHECK: @llvm.x86.vcvtph2ps.256
   return _mm256_cvtph_ps(a);
 }
 
 __m128i test_mm_cvtps_ph(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_ph
   // CHECK: @llvm.x86.vcvtps2ph.128
   return _mm_cvtps_ph(a, 0);
 }
 
 __m128i test_mm256_cvtps_ph(__m256 a) {
+  // CHECK-LABEL: test_mm256_cvtps_ph
   // CHECK: @llvm.x86.vcvtps2ph.256
   return _mm256_cvtps_ph(a, 0);
 }
diff --git a/test/CodeGen/fast-math.c b/test/CodeGen/fast-math.c
index 4a51358..6f98b84 100644
--- a/test/CodeGen/fast-math.c
+++ b/test/CodeGen/fast-math.c
@@ -2,7 +2,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK-LABEL: define void @foo()
+  // CHECK-LABEL: define {{.*}}void @foo()
 
   // CHECK: fadd fast
   f0 = f1 + f2;
diff --git a/test/CodeGen/finite-math.c b/test/CodeGen/finite-math.c
index 8365b56..90a83fa 100644
--- a/test/CodeGen/finite-math.c
+++ b/test/CodeGen/finite-math.c
@@ -5,7 +5,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK-LABEL: define void @foo()
+  // CHECK-LABEL: define {{.*}}void @foo()
 
   // FINITE: fadd nnan ninf
   // NSZ: fadd nsz
diff --git a/test/CodeGen/fma-builtins.c b/test/CodeGen/fma-builtins.c
index 3424616..922f12b 100644
--- a/test/CodeGen/fma-builtins.c
+++ b/test/CodeGen/fma-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/fma4-builtins.c b/test/CodeGen/fma4-builtins.c
index b805e9a..69cbcd8 100644
--- a/test/CodeGen/fma4-builtins.c
+++ b/test/CodeGen/fma4-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,161 +6,193 @@
 #include <x86intrin.h>
 
 __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_macc_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps
   return _mm_macc_ps(a, b, c);
 }
 
 __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_macc_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd
   return _mm_macc_pd(a, b, c);
 }
 
 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_macc_ss
   // CHECK: @llvm.x86.fma.vfmadd.ss
   return _mm_macc_ss(a, b, c);
 }
 
 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_macc_sd
   // CHECK: @llvm.x86.fma.vfmadd.sd
   return _mm_macc_sd(a, b, c);
 }
 
 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msub_ps
   // CHECK: @llvm.x86.fma.vfmsub.ps
   return _mm_msub_ps(a, b, c);
 }
 
 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msub_pd
   // CHECK: @llvm.x86.fma.vfmsub.pd
   return _mm_msub_pd(a, b, c);
 }
 
 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msub_ss
   // CHECK: @llvm.x86.fma.vfmsub.ss
   return _mm_msub_ss(a, b, c);
 }
 
 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msub_sd
   // CHECK: @llvm.x86.fma.vfmsub.sd
   return _mm_msub_sd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmacc_ps
   // CHECK: @llvm.x86.fma.vfnmadd.ps
   return _mm_nmacc_ps(a, b, c);
 }
 
 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmacc_pd
   // CHECK: @llvm.x86.fma.vfnmadd.pd
   return _mm_nmacc_pd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmacc_ss
   // CHECK: @llvm.x86.fma.vfnmadd.ss
   return _mm_nmacc_ss(a, b, c);
 }
 
 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmacc_sd
   // CHECK: @llvm.x86.fma.vfnmadd.sd
   return _mm_nmacc_sd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmsub_ps
   // CHECK: @llvm.x86.fma.vfnmsub.ps
   return _mm_nmsub_ps(a, b, c);
 }
 
 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmsub_pd
   // CHECK: @llvm.x86.fma.vfnmsub.pd
   return _mm_nmsub_pd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmsub_ss
   // CHECK: @llvm.x86.fma.vfnmsub.ss
   return _mm_nmsub_ss(a, b, c);
 }
 
 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmsub_sd
   // CHECK: @llvm.x86.fma.vfnmsub.sd
   return _mm_nmsub_sd(a, b, c);
 }
 
 __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_maddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps
   return _mm_maddsub_ps(a, b, c);
 }
 
 __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_maddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd
   return _mm_maddsub_pd(a, b, c);
 }
 
 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msubadd_ps
   // CHECK: @llvm.x86.fma.vfmsubadd.ps
   return _mm_msubadd_ps(a, b, c);
 }
 
 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msubadd_pd
   // CHECK: @llvm.x86.fma.vfmsubadd.pd
   return _mm_msubadd_pd(a, b, c);
 }
 
 __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_macc_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps.256
   return _mm256_macc_ps(a, b, c);
 }
 
 __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_macc_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd.256
   return _mm256_macc_pd(a, b, c);
 }
 
 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_msub_ps
   // CHECK: @llvm.x86.fma.vfmsub.ps.256
   return _mm256_msub_ps(a, b, c);
 }
 
 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_msub_pd
   // CHECK: @llvm.x86.fma.vfmsub.pd.256
   return _mm256_msub_pd(a, b, c);
 }
 
 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_nmacc_ps
   // CHECK: @llvm.x86.fma.vfnmadd.ps.256
   return _mm256_nmacc_ps(a, b, c);
 }
 
 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_nmacc_pd
   // CHECK: @llvm.x86.fma.vfnmadd.pd.256
   return _mm256_nmacc_pd(a, b, c);
 }
 
 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_nmsub_ps
   // CHECK: @llvm.x86.fma.vfnmsub.ps.256
   return _mm256_nmsub_ps(a, b, c);
 }
 
 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_nmsub_pd
   // CHECK: @llvm.x86.fma.vfnmsub.pd.256
   return _mm256_nmsub_pd(a, b, c);
 }
 
 __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_maddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps.256
   return _mm256_maddsub_ps(a, b, c);
 }
 
 __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_maddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd.256
   return _mm256_maddsub_pd(a, b, c);
 }
 
 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_msubadd_ps
   // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
   return _mm256_msubadd_ps(a, b, c);
 }
 
 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_msubadd_pd
   // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
   return _mm256_msubadd_pd(a, b, c);
 }
diff --git a/test/CodeGen/fp-contract-pragma.cpp b/test/CodeGen/fp-contract-pragma.cpp
index b4e24b9..1c5921a 100644
--- a/test/CodeGen/fp-contract-pragma.cpp
+++ b/test/CodeGen/fp-contract-pragma.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
 
-// Is FP_CONTRACT is honored in a simple case?
+// Is FP_CONTRACT honored in a simple case?
 float fp_contract_1(float a, float b, float c) {
 // CHECK: _Z13fp_contract_1fff
 // CHECK: tail call float @llvm.fmuladd
@@ -19,7 +19,7 @@
   return a * b + c;  
 }
 
-// Does FP_CONTRACT survive template instatiation?
+// Does FP_CONTRACT survive template instantiation?
 class Foo {};
 Foo operator+(Foo, Foo);
 
@@ -62,3 +62,15 @@
   return a * b + c;
 }
 
+// If the multiply has multiple uses, don't produce fmuladd.
+// This used to assert (PR25719):
+// https://llvm.org/bugs/show_bug.cgi?id=25719
+
+float fp_contract_7(float a, float b, float c) {
+// CHECK: _Z13fp_contract_7fff
+// CHECK:  %[[M:.+]] = fmul float %b, 2.000000e+00
+// CHECK-NEXT: fsub float %[[M]], %c
+  #pragma STDC FP_CONTRACT ON
+  return (a = 2 * b) - c;
+}
+
diff --git a/test/CodeGen/fsgsbase-builtins.c b/test/CodeGen/fsgsbase-builtins.c
index 14c51a9..5e9ba8c 100644
--- a/test/CodeGen/fsgsbase-builtins.c
+++ b/test/CodeGen/fsgsbase-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fsgsbase -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fsgsbase -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/function-attributes.c b/test/CodeGen/function-attributes.c
index 177ad84..8f682a7 100644
--- a/test/CodeGen/function-attributes.c
+++ b/test/CodeGen/function-attributes.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -Os -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -Os -std=c99 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -disable-llvm-optzns -Os -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -disable-llvm-optzns -Os -std=c99 -o - %s | FileCheck %s
 // CHECK: define signext i8 @f0(i32 %x) [[NUW:#[0-9]+]]
 // CHECK: define zeroext i8 @f1(i32 %x) [[NUW]]
 // CHECK: define void @f2(i8 signext %x) [[NUW]]
@@ -56,31 +56,11 @@
   return arg ? 0 : f10_t();
 }
 
-// CHECK: define void @f13() [[NUW]]
+// CHECK: define void @f13() [[NUW_OS_RN:#[0-9]+]]
 void f13(void) __attribute__((pure)) __attribute__((const));
 void f13(void){}
 
 
-// Ensure that these get inlined: rdar://6853279
-// CHECK-LABEL: define void @f14
-// CHECK-NOT: @ai_
-// CHECK: call void @f14_end
-static __inline__ __attribute__((always_inline))
-int ai_1() {  return 4; }
-
-static __inline__ __attribute__((always_inline))
-struct {
-  int a, b, c, d, e;
-} ai_2() { while (1) {} }
-
-void f14(int a) {
-  extern void f14_end(void);
-  if (a)
-    ai_2();
-  ai_1();
-  f14_end();
-}
-
 // <rdar://problem/7102668> [irgen] clang isn't setting the optsize bit on functions
 // CHECK-LABEL: define void @f15
 // CHECK: [[NUW]]
@@ -128,10 +108,11 @@
   _setjmp(0);
 }
 
-// CHECK: attributes [[NUW]] = { nounwind optsize readnone{{.*}} }
-// CHECK: attributes [[AI]] = { alwaysinline nounwind optsize readnone{{.*}} }
-// CHECK: attributes [[ALIGN]] = { nounwind optsize readnone alignstack=16{{.*}} }
+// CHECK: attributes [[NUW]] = { nounwind optsize{{.*}} }
+// CHECK: attributes [[AI]] = { alwaysinline nounwind optsize{{.*}} }
+// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize readnone{{.*}} }
+// CHECK: attributes [[ALIGN]] = { nounwind optsize alignstack=16{{.*}} }
 // CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} }
-// CHECK: attributes [[NR]] = { noreturn nounwind optsize }
+// CHECK: attributes [[NR]] = { noreturn optsize }
 // CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone }
-// CHECK: attributes [[RT_CALL]] = { nounwind optsize returns_twice }
+// CHECK: attributes [[RT_CALL]] = { optsize returns_twice }
diff --git a/test/CodeGen/global-blocks-lines.c b/test/CodeGen/global-blocks-lines.c
index 36e4618..a46c26e 100644
--- a/test/CodeGen/global-blocks-lines.c
+++ b/test/CodeGen/global-blocks-lines.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Make sure we do not generate line info for debugging-related frame setup.
 // CHECK: define {{.*}}block_invoke
 // CHECK-NOT: store {{.*}}%struct.__block_descriptor*{{.*}}dbg
diff --git a/test/CodeGen/hidden-alias-to-internal-function.c b/test/CodeGen/hidden-alias-to-internal-function.c
index e939228..93f17e5 100644
--- a/test/CodeGen/hidden-alias-to-internal-function.c
+++ b/test/CodeGen/hidden-alias-to-internal-function.c
@@ -4,5 +4,5 @@
 void bar(void) __attribute__((alias("foo")))
 __attribute__((visibility("hidden")));
 
-// CHECK: @bar = hidden alias void ()* @foo
+// CHECK: @bar = hidden alias void (), void ()* @foo
 // CHECK: define internal void @foo()
diff --git a/test/CodeGen/inline.c b/test/CodeGen/inline.c
index a45bccc..fe7efe3 100644
--- a/test/CodeGen/inline.c
+++ b/test/CodeGen/inline.c
@@ -53,7 +53,8 @@
 // CHECK3-LABEL: define linkonce_odr i32 @_Z2eiv()
 
 // RUN: echo "MS C Mode tests:"
-// RUN: %clang_cc1 %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=c99 -fms-compatibility | FileCheck %s --check-prefix=CHECK4
+// RUN: %clang_cc1 %s -triple i386-pc-win32 -O1 -disable-llvm-optzns -emit-llvm -o - -std=c99 | FileCheck %s --check-prefix=CHECK4
+// CHECK4-NOT: define weak_odr void @_Exit(
 // CHECK4-LABEL: define weak_odr i32 @ei()
 // CHECK4-LABEL: define i32 @bar()
 // CHECK4-NOT: unreferenced1
@@ -62,6 +63,9 @@
 // CHECK4-LABEL: define linkonce_odr i32 @foo()
 // CHECK4-LABEL: define available_externally void @gnu_ei_inline()
 
+__attribute__((noreturn)) void __cdecl _exit(int _Code);
+__inline void __cdecl _Exit(int status) { _exit(status); }
+
 extern __inline int ei() { return 123; }
 
 __inline int foo() {
diff --git a/test/CodeGen/integer-overflow.c b/test/CodeGen/integer-overflow.c
index de3b53f..6a7c3e5 100644
--- a/test/CodeGen/integer-overflow.c
+++ b/test/CodeGen/integer-overflow.c
@@ -72,4 +72,11 @@
   // TRAPV: add i8 {{.*}}, 1
   // CATCH_UB: add i8 {{.*}}, 1
   ++PR9350;
+
+  // PR24256: don't instrument __builtin_frame_address.
+  __builtin_frame_address(0 + 0);
+  // DEFAULT:  call i8* @llvm.frameaddress(i32 0)
+  // WRAPV:    call i8* @llvm.frameaddress(i32 0)
+  // TRAPV:    call i8* @llvm.frameaddress(i32 0)
+  // CATCH_UB: call i8* @llvm.frameaddress(i32 0)
 }
diff --git a/test/CodeGen/le32-arguments.c b/test/CodeGen/le32-arguments.c
index d26640e..e81d843 100644
--- a/test/CodeGen/le32-arguments.c
+++ b/test/CodeGen/le32-arguments.c
@@ -10,7 +10,7 @@
   int bb;
 } s1;
 // Structs should be passed byval and not split up
-// CHECK-LABEL: define void @f1(%struct.s1* byval %i)
+// CHECK-LABEL: define void @f1(%struct.s1* byval align 4 %i)
 void f1(s1 i) {}
 
 typedef struct {
@@ -48,7 +48,7 @@
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK-LABEL: define void @f7(%union.simple_union* byval %s)
+// CHECK-LABEL: define void @f7(%union.simple_union* byval align 4 %s)
 void f7(union simple_union s) {}
 
 typedef struct {
@@ -57,5 +57,5 @@
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK-LABEL: define void @f8(%struct.bitfield1* byval %bf1)
+// CHECK-LABEL: define void @f8(%struct.bitfield1* byval align 4 %bf1)
 void f8(bitfield1 bf1) {}
diff --git a/test/CodeGen/le32-regparm.c b/test/CodeGen/le32-regparm.c
index c8f7069..ecb1030 100644
--- a/test/CodeGen/le32-regparm.c
+++ b/test/CodeGen/le32-regparm.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple le32-unknown-nacl %s -fsyntax-only -verify
+// RUN: %clang_cc1 -triple aarch64 %s -fsyntax-only -verify
 
 void __attribute__((regparm(2))) fc_f1(int i, int j, int k) {} // expected-error{{'regparm' is not valid on this platform}}
 
diff --git a/test/CodeGen/lifetime-debuginfo-1.c b/test/CodeGen/lifetime-debuginfo-1.c
index 674346a..e2e45cb 100644
--- a/test/CodeGen/lifetime-debuginfo-1.c
+++ b/test/CodeGen/lifetime-debuginfo-1.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 
 // Inserting lifetime markers should not affect debuginfo
 
diff --git a/test/CodeGen/lifetime-debuginfo-2.c b/test/CodeGen/lifetime-debuginfo-2.c
index 03afbd8..1d2fb59 100644
--- a/test/CodeGen/lifetime-debuginfo-2.c
+++ b/test/CodeGen/lifetime-debuginfo-2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 
 // Inserting lifetime markers should not affect debuginfo: lifetime.end is not
 // a destructor, but instrumentation for the compiler. Ensure the debug info for
diff --git a/test/CodeGen/lineno-dbginfo.c b/test/CodeGen/lineno-dbginfo.c
index ac61c83..5fe64ec 100644
--- a/test/CodeGen/lineno-dbginfo.c
+++ b/test/CodeGen/lineno-dbginfo.c
@@ -1,5 +1,5 @@
 // RUN: echo "#include <stddef.h>" > %t.h
-// RUN: %clang_cc1 -S -g -include %t.h %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -include %t.h %s -emit-llvm -o - | FileCheck %s
 
 // CHECK: !DIGlobalVariable(name: "outer",
 // CHECK-NOT:               linkageName:
diff --git a/test/CodeGen/linetable-endscope.c b/test/CodeGen/linetable-endscope.c
index 961eaec..6eefbea 100644
--- a/test/CodeGen/linetable-endscope.c
+++ b/test/CodeGen/linetable-endscope.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 
 // Check the line numbers for the ret instruction. We expect it to be
 // at the closing of the lexical scope in this case. See the comments in
diff --git a/test/CodeGen/link-bitcode-file.c b/test/CodeGen/link-bitcode-file.c
index 92b1a88..7810fe1 100644
--- a/test/CodeGen/link-bitcode-file.c
+++ b/test/CodeGen/link-bitcode-file.c
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -emit-llvm-bc -o %t.bc %s
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
-// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE2 -emit-llvm-bc -o %t-2.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN:     -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %s \
+// RUN:     | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -O3 -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
 // Make sure we deal with failure to load the file.
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:    -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
@@ -9,11 +15,15 @@
 
 #ifdef BITCODE
 
+extern int f2(void);
 // CHECK-BC: fatal error: cannot link module {{.*}}'f': symbol multiply defined
 int f(void) {
+  f2();
   return 42;
 }
 
+#elif BITCODE2
+int f2(void) { return 43; }
 #else
 
 // CHECK-NO-BC-LABEL: define i32 @g
@@ -23,6 +33,7 @@
 }
 
 // CHECK-NO-BC-LABEL: define i32 @f
+// CHECK-NO-BC2-LABEL: define i32 @f2
 
 #endif
 
diff --git a/test/CodeGen/linkage-redecl.c b/test/CodeGen/linkage-redecl.c
index 58993f3..4767a94 100644
--- a/test/CodeGen/linkage-redecl.c
+++ b/test/CodeGen/linkage-redecl.c
@@ -16,4 +16,4 @@
 }
 
 extern void f(int x) { } // still has internal linkage
-// CHECK-LABEL: define internal void @f
+// CHECK-LABEL: define internal {{.*}}void @f
diff --git a/test/CodeGen/long_double_fp128.cpp b/test/CodeGen/long_double_fp128.cpp
new file mode 100644
index 0000000..713a633
--- /dev/null
+++ b/test/CodeGen/long_double_fp128.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-linux-android -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=A64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=G64
+// RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=P64
+// RUN: %clang_cc1 -triple i686-linux-android -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=A32
+// RUN: %clang_cc1 -triple i686-linux-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=G32
+// RUN: %clang_cc1 -triple powerpc-linux-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=P32
+// RUN: %clang_cc1 -triple x86_64-nacl -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=N64
+
+// Check mangled name of long double.
+// Android's gcc and llvm use fp128 for long double.
+// NaCl uses double format for long double, but still has separate overloads.
+void test(long, float, double, long double, long double _Complex) { }
+// A64:  define void @_Z4testlfdgCg(i64, float, double, fp128, { fp128, fp128 }*
+// G64:  define void @_Z4testlfdeCe(i64, float, double, x86_fp80, { x86_fp80, x86_fp80 }*
+// P64:  define void @_Z4testlfdgCg(i64, float, double, ppc_fp128, ppc_fp128 {{.*}}, ppc_fp128
+// A32:  define void @_Z4testlfdeCe(i32, float, double, double, { double, double }*
+// G32:  define void @_Z4testlfdeCe(i32, float, double, x86_fp80, { x86_fp80, x86_fp80 }*
+// P32:  define void @_Z4testlfdgCg(i32, float, double, ppc_fp128, { ppc_fp128, ppc_fp128 }*
+// N64: define void @_Z4testlfdeCe(i32, float, double, double, double {{.*}}, double
diff --git a/test/CodeGen/lzcnt-builtins.c b/test/CodeGen/lzcnt-builtins.c
index a083de9..2f83086 100644
--- a/test/CodeGen/lzcnt-builtins.c
+++ b/test/CodeGen/lzcnt-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/mangle-ms.c b/test/CodeGen/mangle-ms.c
new file mode 100644
index 0000000..0ad43d5
--- /dev/null
+++ b/test/CodeGen/mangle-ms.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s
+
+// CHECK: define void @"\01?f@@$$J0YAXP6AX@Z@Z"
+__attribute__((overloadable)) void f(void (*x)()) {}
diff --git a/test/CodeGen/mingw-long-double.c b/test/CodeGen/mingw-long-double.c
new file mode 100644
index 0000000..1c7c31f
--- /dev/null
+++ b/test/CodeGen/mingw-long-double.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU32
+// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU64
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=MSC64
+
+struct {
+  char c;
+  long double ldb;
+} agggregate_LD = {};
+// GNU32: %struct.anon = type { i8, x86_fp80 }
+// GNU32: @agggregate_LD = global %struct.anon zeroinitializer, align 4
+// GNU64: %struct.anon = type { i8, x86_fp80 }
+// GNU64: @agggregate_LD = global %struct.anon zeroinitializer, align 16
+// MSC64: %struct.anon = type { i8, double }
+// MSC64: @agggregate_LD = global %struct.anon zeroinitializer, align 8
+
+long double dataLD = 1.0L;
+// GNU32: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 4
+// GNU64: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 16
+// MSC64: @dataLD = global double 1.000000e+00, align 8
+
+long double _Complex dataLDC = {1.0L, 1.0L};
+// GNU32: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 4
+// GNU64: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 16
+// MSC64: @dataLDC = global { double, double } { double 1.000000e+00, double 1.000000e+00 }, align 8
+
+long double TestLD(long double x) {
+  return x * x;
+}
+// GNU32: define x86_fp80 @TestLD(x86_fp80 %x)
+// GNU64: define void @TestLD(x86_fp80* noalias sret %agg.result, x86_fp80*)
+// MSC64: define double @TestLD(double %x)
+
+long double _Complex TestLDC(long double _Complex x) {
+  return x * x;
+}
+// GNU32: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %x)
+// GNU64: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
+// MSC64: define void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
diff --git a/test/CodeGen/mips-inline-asm-abi.c b/test/CodeGen/mips-inline-asm-abi.c
new file mode 100644
index 0000000..20c4f8d
--- /dev/null
+++ b/test/CodeGen/mips-inline-asm-abi.c
@@ -0,0 +1,12 @@
+// REQUIRES: mips-registered-target
+// RUN: %clang_cc1 -triple mips-linux-gnu -emit-obj -o - %s | \
+// RUN:   llvm-readobj -h - | FileCheck %s
+
+// CHECK: EF_MIPS_ABI_O32
+
+__asm__(
+"bar:\n"
+"  nop\n"
+);
+
+void foo() {}
diff --git a/test/CodeGen/mips-interrupt-attr.c b/test/CodeGen/mips-interrupt-attr.c
new file mode 100644
index 0000000..df70b12
--- /dev/null
+++ b/test/CodeGen/mips-interrupt-attr.c
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple mipsel-unknown-linux -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
+
+void __attribute__ ((interrupt("vector=sw0")))
+isr_sw0 (void)
+{
+  // CHECK: define void @isr_sw0() [[SW0:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=sw1")))
+isr_sw1 (void)
+{
+  // CHECK: define void @isr_sw1() [[SW1:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw0")))
+isr_hw0 (void)
+{
+  // CHECK: define void @isr_hw0() [[HW0:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw1")))
+isr_hw1 (void)
+{
+  // CHECK: define void @isr_hw1() [[HW1:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw2")))
+isr_hw2 (void)
+{
+  // CHECK: define void @isr_hw2() [[HW2:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw3")))
+isr_hw3 (void)
+{
+  // CHECK: define void @isr_hw3() [[HW3:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw4")))
+isr_hw4 (void)
+{
+  // CHECK: define void @isr_hw4() [[HW4:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw5")))
+isr_hw5 (void)
+{
+  // CHECK: define void @isr_hw5() [[HW5:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt))
+isr_eic (void)
+{
+  // CHECK: define void @isr_eic() [[EIC:#[0-9]+]]
+}
+// CHECK: attributes [[SW0]] = { {{.*}} "interrupt"="sw0" {{.*}} }
+// CHECK: attributes [[SW1]] = { {{.*}} "interrupt"="sw1" {{.*}} }
+// CHECK: attributes [[HW0]] = { {{.*}} "interrupt"="hw0" {{.*}} }
+// CHECK: attributes [[HW1]] = { {{.*}} "interrupt"="hw1" {{.*}} }
+// CHECK: attributes [[HW2]] = { {{.*}} "interrupt"="hw2" {{.*}} }
+// CHECK: attributes [[HW3]] = { {{.*}} "interrupt"="hw3" {{.*}} }
+// CHECK: attributes [[HW4]] = { {{.*}} "interrupt"="hw4" {{.*}} }
+// CHECK: attributes [[HW5]] = { {{.*}} "interrupt"="hw5" {{.*}} }
+// CHECK: attributes [[EIC]] = { {{.*}} "interrupt"="eic" {{.*}} }
diff --git a/test/CodeGen/mips-unsupported-nan.c b/test/CodeGen/mips-unsupported-nan.c
index 14a36fc..2fd5042 100644
--- a/test/CodeGen/mips-unsupported-nan.c
+++ b/test/CodeGen/mips-unsupported-nan.c
@@ -1,24 +1,44 @@
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips3 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS3 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips4 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS4 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32R2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r3 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-NOT-MIPS32R3 -check-prefix=CHECK-NAN2008 %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=legacy -march=mips32r6 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32R6 -check-prefix=CHECK-NAN2008 %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64r2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64R2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=legacy -march=mips64r6 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64R6 -check-prefix=CHECK-NAN2008 %s
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS2 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips3 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS3 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips4 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS4 %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS32 %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r3 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=legacy -march=mips32r6 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS32R6 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS64 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64r2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=legacy -march=mips64r6 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS64R6 %s < %t
+
+// NO-WARNINGS-NOT: warning: ignoring '-mnan=legacy' option
+// NO-WARNINGS-NOT: warning: ignoring '-mnan=2008' option
 
 // CHECK-MIPS2: warning: ignoring '-mnan=2008' option because the 'mips2' architecture does not support it
 // CHECK-MIPS3: warning: ignoring '-mnan=2008' option because the 'mips3' architecture does not support it
 // CHECK-MIPS4: warning: ignoring '-mnan=2008' option because the 'mips4' architecture does not support it
 // CHECK-MIPS32: warning: ignoring '-mnan=2008' option because the 'mips32' architecture does not support it
-// CHECK-MIPS32R2: warning: ignoring '-mnan=2008' option because the 'mips32r2' architecture does not support it
-// CHECK-MIPS32R3: warning: ignoring '-mnan=2008' option because the 'mips32r3' architecture does not support it
 // CHECK-MIPS32R6: warning: ignoring '-mnan=legacy' option because the 'mips32r6' architecture does not support it
 // CHECK-MIPS64: warning: ignoring '-mnan=2008' option because the 'mips64' architecture does not support it
-// CHECK-MIPS64R2: warning: ignoring '-mnan=2008' option because the 'mips64r2' architecture does not support it
 // CHECK-MIPS64R6: warning: ignoring '-mnan=legacy' option because the 'mips64r6' architecture does not support it
+
 // CHECK-NANLEGACY: float 0x7FF4000000000000
 // CHECK-NAN2008: float 0x7FF8000000000000
 
diff --git a/test/CodeGen/mips-varargs.c b/test/CodeGen/mips-varargs.c
index 891769c..0d656dc 100644
--- a/test/CodeGen/mips-varargs.c
+++ b/test/CodeGen/mips-varargs.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple mips-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
-// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
 
 #include <stdarg.h>
 
@@ -24,75 +24,31 @@
 // O32:   %va = alloca i8*, align [[PTRALIGN:4]]
 // N32:   %va = alloca i8*, align [[PTRALIGN:4]]
 // N64:   %va = alloca i8*, align [[PTRALIGN:8]]
+// ALL:   [[V:%.*]] = alloca i32, align 4
+// NEW:   [[PROMOTION_TEMP:%.*]] = alloca i32, align 4
+//
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
+// O32:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T:i32]] [[CHUNKSIZE:4]]
+// NEW:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T:i32|i64]] [[CHUNKSIZE:8]]
+//
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i32]]*
+// O32:   [[ARG:%.+]] = load i32, i32* [[AP_CAST]], align [[CHUNKALIGN:4]]
+//
+// N32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i64]]*
+// N32:   [[TMP:%.+]] = load i64, i64* [[AP_CAST]], align [[CHUNKALIGN:8]]
+// N64:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i64]]*
+// N64:   [[TMP:%.+]] = load i64, i64* [[AP_CAST]], align [[CHUNKALIGN:8]]
+// NEW:   [[TMP2:%.+]] = trunc i64 [[TMP]] to i32
+// NEW:   store i32 [[TMP2]], i32* [[PROMOTION_TEMP]], align 4
+// NEW:   [[ARG:%.+]] = load i32, i32* [[PROMOTION_TEMP]], align 4
+// ALL:   store i32 [[ARG]], i32* [[V]], align 4
 //
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
-//
-// O32:   store i32* [[AP_NEXT]], i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
-// NEW:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// NEW:   [[ARG1:%.+]] = trunc i64 [[TMP2]] to i32
-//
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i32 [[ARG1]]
-// ALL: }
-
-int test_i32_2args(char *fmt, ...) {
-  va_list va;
-
-  va_start(va, fmt);
-  int v1 = va_arg(va, int);
-  int v2 = va_arg(va, int);
-  va_end(va);
-
-  return v1 + v2;
-}
-
-// ALL-LABEL: define i32 @test_i32_2args(i8*{{.*}} %fmt, ...)
-//
-// ALL:   %va = alloca i8*, align [[PTRALIGN]]
-// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT1:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT1:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
-//
-// O32:   store i32* [[AP_NEXT1]], i32** [[TMP0]], align [[PTRALIGN]]
-// FIXME: N32 optimised this store out. Why only for this ABI?
-// N64:   store i64* [[AP_NEXT1]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
-// NEW:   [[TMP3:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// NEW:   [[ARG1:%.+]] = trunc i64 [[TMP3]] to i32
-//
-// O32:   [[AP_NEXT2:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 2
-// NEW:   [[AP_NEXT2:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T]] 2
-//
-// O32:   store i32* [[AP_NEXT2]], i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT2]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG2:%.+]] = load i32, i32* [[AP_NEXT1]], align 4
-// NEW:   [[TMP4:%.+]] = load i64, i64* [[AP_NEXT1]], align 8
-// NEW:   [[ARG2:%.+]] = trunc i64 [[TMP4]] to i32
-//
-// ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   [[ADD:%.+]] = add nsw i32 [[ARG2]], [[ARG1]]
-// ALL:   ret i32 [[ADD]]
 // ALL: }
 
 long long test_i64(char *fmt, ...) {
@@ -108,32 +64,25 @@
 // ALL-LABEL: define i64 @test_i64(i8*{{.*}} %fmt, ...)
 //
 // ALL:   %va = alloca i8*, align [[PTRALIGN]]
-// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// O32:   [[AP_CUR:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 //
 // i64 is 8-byte aligned, while this is within O32's stack alignment there's no
 // guarantee that the offset is still 8-byte aligned after earlier reads.
-// O32:   [[PTR1:%.+]] = add i32 [[AP_CUR]], 7
-// O32:   [[PTR2:%.+]] = and i32 [[PTR1]], -8
-// O32:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i64*
-// O32:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
+// O32:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to i32
+// O32:   [[TMP2:%.+]] = add i32 [[TMP1]], 7
+// O32:   [[TMP3:%.+]] = and i32 [[TMP2]], -8
+// O32:   [[AP_CUR:%.+]] = inttoptr i32 [[TMP3]] to i8*
 //
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 8
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] 8
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
 //
-// O32:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
+// ALL:   [[AP_CAST:%.*]] = bitcast i8* [[AP_CUR]] to i64*
+// ALL:   [[ARG:%.+]] = load i64, i64* [[AP_CAST]], align 8
 //
-// O32:   [[ARG1:%.+]] = load i64, i64* [[PTR3]], align 8
-// NEW:   [[ARG1:%.+]] = load i64, i64* [[AP_CUR]], align 8
-//
+// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i64 [[ARG1]]
 // ALL: }
 
 char *test_ptr(char *fmt, ...) {
@@ -148,41 +97,30 @@
 
 // ALL-LABEL: define i8* @test_ptr(i8*{{.*}} %fmt, ...)
 //
-// O32:   %va = alloca i8*, align [[PTRALIGN:4]]
-// N32:   %va = alloca i8*, align [[PTRALIGN:4]]
-// N64:   %va = alloca i8*, align [[PTRALIGN:8]]
+// ALL:   %va = alloca i8*, align [[PTRALIGN]]
+// ALL:   [[V:%.*]] = alloca i8*, align [[PTRALIGN]]
+// N32:   [[AP_CAST:%.+]] = alloca i8*, align 4
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] [[CHUNKSIZE]]
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// When the chunk size matches the pointer size, this is easy.
+// O32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to i8**
+// N64:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to i8**
+// Otherwise we need a promotion temporary.
+// N32:   [[TMP1:%.+]] = bitcast i8* [[AP_CUR]] to i64*
+// N32:   [[TMP2:%.+]] = load i64, i64* [[TMP1]], align 8
+// N32:   [[TMP3:%.+]] = trunc i64 [[TMP2]] to i32
+// N32:   [[PTR:%.+]] = inttoptr i32 [[TMP3]] to i8*
+// N32:   store i8* [[PTR]], i8** [[AP_CAST]], align 4
+//
+// ALL:   [[ARG:%.+]] = load i8*, i8** [[AP_CAST]], align [[PTRALIGN]]
+// ALL:   store i8* [[ARG]], i8** [[V]], align [[PTRALIGN]]
 //
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// O32:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// N32:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-// N64:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// N64:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], i32 1
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
-// N64:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], {{i32|i64}} 1
-//
-// O32:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
-// N64:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 4
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to
-// 64-bit so we must truncate the excess and bitcast to a N32 pointer.
-// N32:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// N32:   [[TMP3:%.+]] = trunc i64 [[TMP2]] to i32
-// N32:   [[ARG1:%.+]] = inttoptr i32 [[TMP3]] to i8*
-// N64:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 8
-//
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i8* [[ARG1]]
 // ALL: }
 
 int test_v4i32(char *fmt, ...) {
@@ -198,31 +136,33 @@
 // ALL-LABEL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...)
 //
 // ALL:   %va = alloca i8*, align [[PTRALIGN]]
+// ALL:   [[V]] = alloca <4 x i32>, align 16
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// N32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// N64:   [[TMP0:%.+]] = bitcast i8** %va to i64*
-//
-// O32:   [[PTR0:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// N32:   [[PTR0:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// N64:   [[PTR0:%.+]] = load [[INTPTR_T:i64]], i64* [[TMP0]], align [[PTRALIGN]]
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 //
 // Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of
 // 8-bytes since the base of the stack is 8-byte aligned.
-// O32:   [[PTR1:%.+]] = add i32 [[PTR0]], 7
-// O32:   [[PTR2:%.+]] = and i32 [[PTR1]], -8
+// O32:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to i32
+// O32:   [[TMP2:%.+]] = add i32 [[TMP1]], 7
+// O32:   [[TMP3:%.+]] = and i32 [[TMP2]], -8
+// O32:   [[AP_CUR:%.+]] = inttoptr i32 [[TMP3]] to i8*
 //
-// NEW:   [[PTR1:%.+]] = add [[INTPTR_T]] [[PTR0]], 15
-// NEW:   [[PTR2:%.+]] = and [[INTPTR_T]] [[PTR1]], -16
+// NEW:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T]]
+// NEW:   [[TMP2:%.+]] = add [[INTPTR_T]] [[TMP1]], 15
+// NEW:   [[TMP3:%.+]] = and [[INTPTR_T]] [[TMP2]], -16
+// NEW:   [[AP_CUR:%.+]] = inttoptr [[INTPTR_T]] [[TMP3]] to i8*
 //
-// ALL:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to <4 x i32>*
-// ALL:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
-// ALL:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 16
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] 16
 // ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
-// ALL:   [[PTR5:%.+]] = load <4 x i32>, <4 x i32>* [[PTR3]], align 16
+//
+// ALL:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to <4 x i32>*
+// O32:   [[ARG:%.+]] = load <4 x i32>, <4 x i32>* [[AP_CAST]], align 8
+// N64:   [[ARG:%.+]] = load <4 x i32>, <4 x i32>* [[AP_CAST]], align 16
+// ALL:   store <4 x i32> [[ARG]], <4 x i32>* [[V]], align 16
+//
+// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> [[PTR5]], i32 0
+// ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> {{.*}}, i32 0
 // ALL:   ret i32 [[VECEXT]]
 // ALL: }
diff --git a/test/CodeGen/mmx-builtins.c b/test/CodeGen/mmx-builtins.c
index 346676c..44d1ea4 100644
--- a/test/CodeGen/mmx-builtins.c
+++ b/test/CodeGen/mmx-builtins.c
@@ -1,453 +1,608 @@
-// REQUIRES: x86-registered-target
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +ssse3 -S -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
-// FIXME: Disable inclusion of mm_malloc.h, our current implementation is broken
-// on win32 since we don't generally know how to find errno.h.
+// Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
-#include <tmmintrin.h>
+#include <x86intrin.h>
 
-__m64 test1(__m64 a, __m64 b) {
-  // CHECK: phaddw
-  return _mm_hadd_pi16(a, b);
-}
-
-__m64 test2(__m64 a, __m64 b) {
-  // CHECK: phaddd
-  return _mm_hadd_pi32(a, b);
-}
-
-__m64 test3(__m64 a, __m64 b) {
-  // CHECK: phaddsw
-  return _mm_hadds_pi16(a, b);
-}
-
-__m64 test4(__m64 a, __m64 b) {
-  // CHECK: phsubw
-  return _mm_hsub_pi16(a, b);
-}
-
-__m64 test5(__m64 a, __m64 b) {
-  // CHECK: phsubd
-  return _mm_hsub_pi32(a, b);
-}
-
-__m64 test6(__m64 a, __m64 b) {
-  // CHECK: phsubsw
-  return _mm_hsubs_pi16(a, b);
-}
-
-__m64 test7(__m64 a, __m64 b) {
-  // CHECK: pmaddubsw
-  return _mm_maddubs_pi16(a, b);
-}
-
-__m64 test8(__m64 a, __m64 b) {
-  // CHECK: pmulhrsw
-  return _mm_mulhrs_pi16(a, b);
-}
-
-__m64 test9(__m64 a, __m64 b) {
-  // CHECK: pshufb
-  return _mm_shuffle_pi8(a, b);
-}
-
-__m64 test10(__m64 a, __m64 b) {
-  // CHECK: psignb
-  return _mm_sign_pi8(a, b);
-}
-
-__m64 test11(__m64 a, __m64 b) {
-  // CHECK: psignw
-  return _mm_sign_pi16(a, b);
-}
-
-__m64 test12(__m64 a, __m64 b) {
-  // CHECK: psignd
-  return _mm_sign_pi32(a, b);
-}
-
-__m64 test13(__m64 a) {
-  // CHECK: pabsb
+__m64 test_mm_abs_pi8(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.b
   return _mm_abs_pi8(a);
 }
 
-__m64 test14(__m64 a) {
-  // CHECK: pabsw
+__m64 test_mm_abs_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.w
   return _mm_abs_pi16(a);
 }
 
-__m64 test15(__m64 a) {
-  // CHECK: pabsd
+__m64 test_mm_abs_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.d
   return _mm_abs_pi32(a);
 }
 
-__m64 test16(__m64 a, __m64 b) {
-  // CHECK: palignr
-  return _mm_alignr_pi8(a, b, 2);
-}
-
-__m64 test17(__m128d a) {
-  // CHECK: cvtpd2pi
-  return _mm_cvtpd_pi32(a);
-}
-
-__m64 test18(__m128d a) {
-  // CHECK: cvttpd2pi
-  return _mm_cvttpd_pi32(a);
-}
-
-__m128d test19(__m64 a) {
-  // CHECK: cvtpi2pd
-  return _mm_cvtpi32_pd(a);
-}
-
-__m64 test20(__m64 a, __m64 b) {
-  // CHECK: pmuludq
-  return _mm_mul_su32(a, b);
-}
-
-__m64 test21(__m64 a) {
-  // CHECK: pshufw
-  return _mm_shuffle_pi16(a, 3);
-}
-
-__m64 test22(__m64 a, __m64 b) {
-  // CHECK: pmulhuw
-  return _mm_mulhi_pu16(a, b);
-}
-
-void test23(__m64 d, __m64 n, char *p) {
-  // CHECK: maskmovq
-  _mm_maskmove_si64(d, n, p);
-}
-
-int test24(__m64 a) {
-  // CHECK: pmovmskb
-  return _mm_movemask_pi8(a);
-}
-
-void test25(__m64 *p, __m64 a) {
-  // CHECK: movntq
-  _mm_stream_pi(p, a);
-}
-
-__m64 test26(__m64 a, __m64 b) {
-  // CHECK: pavgb
-  return _mm_avg_pu8(a, b);
-}
-
-__m64 test27(__m64 a, __m64 b) {
-  // CHECK: pavgw
-  return _mm_avg_pu16(a, b);
-}
-
-__m64 test28(__m64 a, __m64 b) {
-  // CHECK: pmaxub
-  return _mm_max_pu8(a, b);
-}
-
-__m64 test29(__m64 a, __m64 b) {
-  // CHECK: pmaxsw
-  return _mm_max_pi16(a, b);
-}
-
-__m64 test30(__m64 a, __m64 b) {
-  // CHECK: pminub
-  return _mm_min_pu8(a, b);
-}
-
-__m64 test31(__m64 a, __m64 b) {
-  // CHECK: pminsw
-  return _mm_min_pi16(a, b);
-}
-
-__m64 test32(__m64 a, __m64 b) {
-  // CHECK: psadbw
-  return _mm_sad_pu8(a, b);
-}
-
-__m64 test33(__m64 a, __m64 b) {
-  // CHECK: paddb
+__m64 test_mm_add_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.b
   return _mm_add_pi8(a, b);
 }
 
-__m64 test34(__m64 a, __m64 b) {
-  // CHECK: paddw
+__m64 test_mm_add_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.w
   return _mm_add_pi16(a, b);
 }
 
-__m64 test35(__m64 a, __m64 b) {
-  // CHECK: paddd
+__m64 test_mm_add_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.d
   return _mm_add_pi32(a, b);
 }
 
-__m64 test36(__m64 a, __m64 b) {
-  // CHECK: paddq
+__m64 test_mm_add_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.q
   return __builtin_ia32_paddq(a, b);
 }
 
-__m64 test37(__m64 a, __m64 b) {
-  // CHECK: paddsb
+__m64 test_mm_adds_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.padds.b
   return _mm_adds_pi8(a, b);
 }
 
-__m64 test38(__m64 a, __m64 b) {
-  // CHECK: paddsw
+__m64 test_mm_adds_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.padds.w
   return _mm_adds_pi16(a, b);
 }
 
-__m64 test39(__m64 a, __m64 b) {
-  // CHECK: paddusb
+__m64 test_mm_adds_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.paddus.b
   return _mm_adds_pu8(a, b);
 }
 
-__m64 test40(__m64 a, __m64 b) {
-  // CHECK: paddusw
+__m64 test_mm_adds_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.paddus.w
   return _mm_adds_pu16(a, b);
 }
 
-__m64 test41(__m64 a, __m64 b) {
-  // CHECK: psubb
-  return _mm_sub_pi8(a, b);
+__m64 test_mm_alignr_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_alignr_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.palignr.b
+  return _mm_alignr_pi8(a, b, 2);
 }
 
-__m64 test42(__m64 a, __m64 b) {
-  // CHECK: psubw
-  return _mm_sub_pi16(a, b);
-}
-
-__m64 test43(__m64 a, __m64 b) {
-  // CHECK: psubd
-  return _mm_sub_pi32(a, b);
-}
-
-__m64 test44(__m64 a, __m64 b) {
-  // CHECK: psubq
-  return __builtin_ia32_psubq(a, b);
-}
-
-__m64 test45(__m64 a, __m64 b) {
-  // CHECK: psubsb
-  return _mm_subs_pi8(a, b);
-}
-
-__m64 test46(__m64 a, __m64 b) {
-  // CHECK: psubsw
-  return _mm_subs_pi16(a, b);
-}
-
-__m64 test47(__m64 a, __m64 b) {
-  // CHECK: psubusb
-  return _mm_subs_pu8(a, b);
-}
-
-__m64 test48(__m64 a, __m64 b) {
-  // CHECK: psubusw
-  return _mm_subs_pu16(a, b);
-}
-
-__m64 test49(__m64 a, __m64 b) {
-  // CHECK: pmaddwd
-  return _mm_madd_pi16(a, b);
-}
-
-__m64 test50(__m64 a, __m64 b) {
-  // CHECK: pmulhw
-  return _mm_mulhi_pi16(a, b);
-}
-
-__m64 test51(__m64 a, __m64 b) {
-  // CHECK: pmullw
-  return _mm_mullo_pi16(a, b);
-}
-
-__m64 test52(__m64 a, __m64 b) {
-  // CHECK: pmullw
-  return _mm_mullo_pi16(a, b);
-}
-
-__m64 test53(__m64 a, __m64 b) {
-  // CHECK: pand
+__m64 test_mm_and_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_and_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pand
   return _mm_and_si64(a, b);
 }
 
-__m64 test54(__m64 a, __m64 b) {
-  // CHECK: pandn
+__m64 test_mm_andnot_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_andnot_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pandn
   return _mm_andnot_si64(a, b);
 }
 
-__m64 test55(__m64 a, __m64 b) {
-  // CHECK: por
-  return _mm_or_si64(a, b);
-}
-
-__m64 test56(__m64 a, __m64 b) {
-  // CHECK: pxor
-  return _mm_xor_si64(a, b);
-}
-
-__m64 test57(__m64 a, __m64 b) {
-  // CHECK: pavgb
+__m64 test_mm_avg_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_avg_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pavg.b
   return _mm_avg_pu8(a, b);
 }
 
-__m64 test58(__m64 a, __m64 b) {
-  // CHECK: pavgw
+__m64 test_mm_avg_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_avg_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pavg.w
   return _mm_avg_pu16(a, b);
 }
 
-__m64 test59(__m64 a, __m64 b) {
-  // CHECK: psllw
-  return _mm_sll_pi16(a, b);
-}
-
-__m64 test60(__m64 a, __m64 b) {
-  // CHECK: pslld
-  return _mm_sll_pi32(a, b);
-}
-
-__m64 test61(__m64 a, __m64 b) {
-  // CHECK: psllq
-  return _mm_sll_si64(a, b);
-}
-
-__m64 test62(__m64 a, __m64 b) {
-  // CHECK: psrlw
-  return _mm_srl_pi16(a, b);
-}
-
-__m64 test63(__m64 a, __m64 b) {
-  // CHECK: psrld
-  return _mm_srl_pi32(a, b);
-}
-
-__m64 test64(__m64 a, __m64 b) {
-  // CHECK: psrlq
-  return _mm_srl_si64(a, b);
-}
-
-__m64 test65(__m64 a, __m64 b) {
-  // CHECK: psraw
-  return _mm_sra_pi16(a, b);
-}
-
-__m64 test66(__m64 a, __m64 b) {
-  // CHECK: psrad
-  return _mm_sra_pi32(a, b);
-}
-
-__m64 test67(__m64 a) {
-  // CHECK: psllw
-  return _mm_slli_pi16(a, 3);
-}
-
-__m64 test68(__m64 a) {
-  // CHECK: pslld
-  return _mm_slli_pi32(a, 3);
-}
-
-__m64 test69(__m64 a) {
-  // CHECK: psllq
-  return _mm_slli_si64(a, 3);
-}
-
-__m64 test70(__m64 a) {
-  // CHECK: psrlw
-  return _mm_srli_pi16(a, 3);
-}
-
-__m64 test71(__m64 a) {
-  // CHECK: psrld
-  return _mm_srli_pi32(a, 3);
-}
-
-__m64 test72(__m64 a) {
-  // CHECK: psrlq
-  return _mm_srli_si64(a, 3);
-}
-
-__m64 test73(__m64 a) {
-  // CHECK: psraw
-  return _mm_srai_pi16(a, 3);
-}
-
-__m64 test74(__m64 a) {
-  // CHECK: psrad
-  return _mm_srai_pi32(a, 3);
-}
-
-__m64 test75(__m64 a, __m64 b) {
-  // CHECK: packsswb
-  return _mm_packs_pi16(a, b);
-}
-
-__m64 test76(__m64 a, __m64 b) {
-  // CHECK: packssdw
-  return _mm_packs_pi32(a, b);
-}
-
-__m64 test77(__m64 a, __m64 b) {
-  // CHECK: packuswb
-  return _mm_packs_pu16(a, b);
-}
-
-__m64 test78(__m64 a, __m64 b) {
-  // CHECK: punpckhbw
-  return _mm_unpackhi_pi8(a, b);
-}
-
-__m64 test79(__m64 a, __m64 b) {
-  // CHECK: punpckhwd
-  return _mm_unpackhi_pi16(a, b);
-}
-
-__m64 test80(__m64 a, __m64 b) {
-  // CHECK: punpckhdq
-  return _mm_unpackhi_pi32(a, b);
-}
-
-__m64 test81(__m64 a, __m64 b) {
-  // CHECK: punpcklbw
-  return _mm_unpacklo_pi8(a, b);
-}
-
-__m64 test82(__m64 a, __m64 b) {
-  // CHECK: punpcklwd
-  return _mm_unpacklo_pi16(a, b);
-}
-
-__m64 test83(__m64 a, __m64 b) {
-  // CHECK: punpckldq
-  return _mm_unpacklo_pi32(a, b);
-}
-
-__m64 test84(__m64 a, __m64 b) {
-  // CHECK: pcmpeqb
+__m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.b
   return _mm_cmpeq_pi8(a, b);
 }
 
-__m64 test85(__m64 a, __m64 b) {
-  // CHECK: pcmpeqw
+__m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.w
   return _mm_cmpeq_pi16(a, b);
 }
 
-__m64 test86(__m64 a, __m64 b) {
-  // CHECK: pcmpeqd
+__m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.d
   return _mm_cmpeq_pi32(a, b);
 }
 
-__m64 test87(__m64 a, __m64 b) {
-  // CHECK: pcmpgtb
+__m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.b
   return _mm_cmpgt_pi8(a, b);
 }
 
-__m64 test88(__m64 a, __m64 b) {
-  // CHECK: pcmpgtw
+__m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.w
   return _mm_cmpgt_pi16(a, b);
 }
 
-__m64 test89(__m64 a, __m64 b) {
-  // CHECK: pcmpgtd
+__m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.d
   return _mm_cmpgt_pi32(a, b);
 }
+
+__m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvt_pi2ps
+  // CHECK: <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvt_pi2ps(a, b);
+}
+
+__m64 test_mm_cvt_ps2pi(__m128 a) {
+  // CHECK-LABEL: test_mm_cvt_ps2pi
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvt_ps2pi(a);
+}
+
+__m64 test_mm_cvtpd_pi32(__m128d a) {
+  // CHECK-LABEL: test_mm_cvtpd_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtpd2pi
+  return _mm_cvtpd_pi32(a);
+}
+
+__m128 test_mm_cvtpi16_ps(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtpi16_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi16_ps(a);
+}
+
+__m128d test_mm_cvtpi32_pd(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtpi32_pd
+  // CHECK: call <2 x double> @llvm.x86.sse.cvtpi2pd
+  return _mm_cvtpi32_pd(a);
+}
+
+__m128 test_mm_cvtpi32_ps(__m128 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvtpi32_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi32_ps(a, b);
+}
+
+__m128 test_mm_cvtpi32x2_ps(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvtpi32x2_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi32x2_ps(a, b);
+}
+
+__m64 test_mm_cvtps_pi16(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_pi16
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvtps_pi16(a);
+}
+
+__m64 test_mm_cvtps_pi32(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvtps_pi32(a);
+}
+
+__m64 test_mm_cvtsi32_si64(int a) {
+  // CHECK-LABEL: test_mm_cvtsi32_si64
+  // CHECK: insertelement <2 x i32>
+  return _mm_cvtsi32_si64(a);
+}
+
+int test_mm_cvtsi64_si32(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtsi64_si32
+  // CHECK: extractelement <2 x i32>
+  return _mm_cvtsi64_si32(a);
+}
+
+__m64 test_mm_cvttpd_pi32(__m128d a) {
+  // CHECK-LABEL: test_mm_cvttpd_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvttpd2pi
+  return _mm_cvttpd_pi32(a);
+}
+
+__m64 test_mm_cvttps_pi32(__m128 a) {
+  // CHECK-LABEL: test_mm_cvttps_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvttps2pi
+  return _mm_cvttps_pi32(a);
+}
+
+__m64 test_m_from_int(int a) {
+  // CHECK-LABEL: test_m_from_int
+  // CHECK: insertelement <2 x i32>
+  return _m_from_int(a);
+}
+
+__m64 test_m_from_int64(long long a) {
+  // CHECK-LABEL: test_m_from_int64
+  // CHECK: bitcast
+  return _m_from_int64(a);
+}
+
+__m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadd_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.w
+  return _mm_hadd_pi16(a, b);
+}
+
+__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadd_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.d
+  return _mm_hadd_pi32(a, b);
+}
+
+__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadds_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.sw
+  return _mm_hadds_pi16(a, b);
+}
+
+__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsub_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.w
+  return _mm_hsub_pi16(a, b);
+}
+
+__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsub_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.d
+  return _mm_hsub_pi32(a, b);
+}
+
+__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsubs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.sw
+  return _mm_hsubs_pi16(a, b);
+}
+
+__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_madd_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd
+  return _mm_madd_pi16(a, b);
+}
+
+__m64 test_mm_maddubs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_maddubs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw
+  return _mm_maddubs_pi16(a, b);
+}
+
+void test_mm_maskmove_si64(__m64 d, __m64 n, char *p) {
+  // CHECK-LABEL: test_mm_maskmove_si64
+  // CHECK: call void @llvm.x86.mmx.maskmovq
+  _mm_maskmove_si64(d, n, p);
+}
+
+__m64 test_mm_max_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_max_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmaxs.w
+  return _mm_max_pi16(a, b);
+}
+
+__m64 test_mm_max_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_max_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmaxu.b
+  return _mm_max_pu8(a, b);
+}
+
+__m64 test_mm_min_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_min_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmins.w
+  return _mm_min_pi16(a, b);
+}
+
+__m64 test_mm_min_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_min_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pminu.b
+  return _mm_min_pu8(a, b);
+}
+
+int test_mm_movemask_pi8(__m64 a) {
+  // CHECK-LABEL: test_mm_movemask_pi8
+  // CHECK: call i32 @llvm.x86.mmx.pmovmskb
+  return _mm_movemask_pi8(a);
+}
+
+__m64 test_mm_mul_su32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mul_su32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulu.dq
+  return _mm_mul_su32(a, b);
+}
+
+__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhi_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulh.w
+  return _mm_mulhi_pi16(a, b);
+}
+
+__m64 test_mm_mulhi_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhi_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulhu.w
+  return _mm_mulhi_pu16(a, b);
+}
+
+__m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhrs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pmul.hr.sw
+  return _mm_mulhrs_pi16(a, b);
+}
+
+__m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mullo_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmull.w
+  return _mm_mullo_pi16(a, b);
+}
+
+__m64 test_mm_or_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_or_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.por
+  return _mm_or_si64(a, b);
+}
+
+__m64 test_mm_packs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.packsswb
+  return _mm_packs_pi16(a, b);
+}
+
+__m64 test_mm_packs_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.packssdw
+  return _mm_packs_pi32(a, b);
+}
+
+__m64 test_mm_packs_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.packuswb
+  return _mm_packs_pu16(a, b);
+}
+
+__m64 test_mm_sad_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sad_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psad.bw
+  return _mm_sad_pu8(a, b);
+}
+
+__m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_shuffle_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pshuf.b
+  return _mm_shuffle_pi8(a, b);
+}
+
+__m64 test_mm_shuffle_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_shuffle_pi16
+  // CHECK: call x86_mmx @llvm.x86.sse.pshuf.w
+  return _mm_shuffle_pi16(a, 3);
+}
+
+__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.b
+  return _mm_sign_pi8(a, b);
+}
+
+__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.w
+  return _mm_sign_pi16(a, b);
+}
+
+__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.d
+  return _mm_sign_pi32(a, b);
+}
+
+__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.w
+  return _mm_sll_pi16(a, b);
+}
+
+__m64 test_mm_sll_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.d
+  return _mm_sll_pi32(a, b);
+}
+
+__m64 test_mm_sll_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.q
+  return _mm_sll_si64(a, b);
+}
+
+__m64 test_mm_slli_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.w
+  return _mm_slli_pi16(a, 3);
+}
+
+__m64 test_mm_slli_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.d
+  return _mm_slli_pi32(a, 3);
+}
+
+__m64 test_mm_slli_si64(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.q
+  return _mm_slli_si64(a, 3);
+}
+
+__m64 test_mm_sra_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sra_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psra.w
+  return _mm_sra_pi16(a, b);
+}
+
+__m64 test_mm_sra_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sra_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psra.d
+  return _mm_sra_pi32(a, b);
+}
+
+__m64 test_mm_srai_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_srai_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrai.w
+  return _mm_srai_pi16(a, 3);
+}
+
+__m64 test_mm_srai_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_srai_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrai.d
+  return _mm_srai_pi32(a, 3);
+}
+
+__m64 test_mm_srl_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.w
+  return _mm_srl_pi16(a, b);
+}
+
+__m64 test_mm_srl_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.d
+  return _mm_srl_pi32(a, b);
+}
+
+__m64 test_mm_srl_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.q
+  return _mm_srl_si64(a, b);
+}
+
+__m64 test_mm_srli_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.w
+  return _mm_srli_pi16(a, 3);
+}
+
+__m64 test_mm_srli_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.d
+  return _mm_srli_pi32(a, 3);
+}
+
+__m64 test_mm_srli_si64(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.q
+  return _mm_srli_si64(a, 3);
+}
+
+void test_mm_stream_pi(__m64 *p, __m64 a) {
+  // CHECK-LABEL: test_mm_stream_pi
+  // CHECK: call void @llvm.x86.mmx.movnt.dq
+  _mm_stream_pi(p, a);
+}
+
+__m64 test_mm_sub_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.b
+  return _mm_sub_pi8(a, b);
+}
+
+__m64 test_mm_sub_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.w
+  return _mm_sub_pi16(a, b);
+}
+
+__m64 test_mm_sub_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.d
+  return _mm_sub_pi32(a, b);
+}
+
+__m64 test_mm_sub_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.q
+  return __builtin_ia32_psubq(a, b);
+}
+
+__m64 test_mm_subs_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubs.b
+  return _mm_subs_pi8(a, b);
+}
+
+__m64 test_mm_subs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubs.w
+  return _mm_subs_pi16(a, b);
+}
+
+__m64 test_mm_subs_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubus.b
+  return _mm_subs_pu8(a, b);
+}
+
+__m64 test_mm_subs_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubus.w
+  return _mm_subs_pu16(a, b);
+}
+
+int test_m_to_int(__m64 a) {
+  // CHECK-LABEL: test_m_to_int
+  // CHECK: extractelement <2 x i32>
+  return _m_to_int(a);
+}
+
+long long test_m_to_int64(__m64 a) {
+  // CHECK-LABEL: test_m_to_int64
+  // CHECK: bitcast
+  return _m_to_int64(a);
+}
+
+__m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhbw
+  return _mm_unpackhi_pi8(a, b);
+}
+
+__m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhwd
+  return _mm_unpackhi_pi16(a, b);
+}
+
+__m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhdq
+  return _mm_unpackhi_pi32(a, b);
+}
+
+__m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpcklbw
+  return _mm_unpacklo_pi8(a, b);
+}
+
+__m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpcklwd
+  return _mm_unpacklo_pi16(a, b);
+}
+
+__m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckldq
+  return _mm_unpacklo_pi32(a, b);
+}
+
+__m64 test_mm_xor_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_xor_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pxor
+  return _mm_xor_si64(a, b);
+}
diff --git a/test/CodeGen/ms-declspecs.c b/test/CodeGen/ms-declspecs.c
index c32733e..91f5aa2 100644
--- a/test/CodeGen/ms-declspecs.c
+++ b/test/CodeGen/ms-declspecs.c
@@ -33,7 +33,12 @@
 __declspec(noreturn) void f20_t(void);
 void f20(void) { f20_t(); }
 
+__declspec(noalias) void noalias_callee(int *x);
+// CHECK: call void @noalias_callee({{.*}}) [[NA:#[0-9]+]]
+void noalias_caller(int *x) { noalias_callee(x); }
+
 // CHECK: attributes [[NAKED]] = { naked noinline nounwind{{.*}} }
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
 // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} }
 // CHECK: attributes [[NR]] = { noreturn }
+// CHECK: attributes [[NA]] = { argmemonly nounwind{{.*}} }
diff --git a/test/CodeGen/ms-inline-asm-align.c b/test/CodeGen/ms-inline-asm-align.c
new file mode 100644
index 0000000..de896b8
--- /dev/null
+++ b/test/CodeGen/ms-inline-asm-align.c
@@ -0,0 +1,30 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s --check-prefix=DARWIN
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fasm-blocks -emit-llvm -o - | FileCheck %s --check-prefix=WINDOWS
+
+// On Windows, .align is in bytes, and on Darwin, .align is in log2 form. The
+// Intel inline assembly parser should rewrite to the appropriate form depending
+// on the platform.
+
+void align_test() {
+  __asm align 8
+  __asm align 16;
+  __asm align 128;
+  __asm ALIGN 256;
+}
+
+// DARWIN-LABEL: define void @align_test()
+// DARWIN: call void asm sideeffect inteldialect
+// DARWIN-SAME: .align 3
+// DARWIN-SAME: .align 4
+// DARWIN-SAME: .align 7
+// DARWIN-SAME: .align 8
+// DARWIN-SAME: "~{dirflag},~{fpsr},~{flags}"()
+
+// WINDOWS-LABEL: define void @align_test()
+// WINDOWS: call void asm sideeffect inteldialect
+// WINDOWS-SAME: .align 8
+// WINDOWS-SAME: .align 16
+// WINDOWS-SAME: .align 128
+// WINDOWS-SAME: .align 256
+// WINDOWS-SAME: "~{dirflag},~{fpsr},~{flags}"()
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index d98b498..f530c37 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -470,6 +470,18 @@
   int b;
 } A;
 
+typedef struct {
+  int b1;
+  A   b2;
+} B;
+
+typedef struct {
+  int c1;
+  A   c2;
+  int c3;
+  B   c4;
+} C;
+
 void t39() {
 // CHECK-LABEL: define void @t39
   __asm mov eax, [eax].A.b
@@ -478,6 +490,14 @@
 // CHECK: mov eax, [eax] .4
   __asm mov eax, fs:[0] A.b
 // CHECK: mov eax, fs:[$$0] .4
+  __asm mov eax, [eax].B.b2.a
+// CHECK: mov eax, [eax].4
+  __asm mov eax, [eax] B.b2.b
+// CHECK: mov eax, [eax] .8
+  __asm mov eax, fs:[0] C.c2.b
+// CHECK: mov eax, fs:[$$0] .8
+  __asm mov eax, [eax]C.c4.b2.b
+// CHECK: mov eax, [eax].24
 // CHECK: "~{eax},~{dirflag},~{fpsr},~{flags}"()
 }
 
@@ -508,6 +528,14 @@
 // CHECK: "*m,*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"(i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}})
 }
 
+void t42() {
+// CHECK-LABEL: define void @t42
+  int flags;
+  __asm mov flags, eax
+// CHECK: mov dword ptr $0, eax
+// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %flags)
+}
+
 void call_clobber() {
   __asm call t41
   // CHECK-LABEL: define void @call_clobber
@@ -525,8 +553,8 @@
     label:
     jmp label
   }
-  // CHECK-LABEL: define void @label1
-  // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.1__label:\0A\09jmp {{.*}}__MSASMLABEL_.1__label", "~{dirflag},~{fpsr},~{flags}"()
+  // CHECK-LABEL: define void @label1()
+  // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.1__label:\0A\09jmp {{.*}}__MSASMLABEL_.1__label", "~{dirflag},~{fpsr},~{flags}"() [[ATTR1:#[0-9]+]]
 }
 
 void label2() {
@@ -555,3 +583,24 @@
   // CHECK-LABEL: define void @label4
   // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.4__label:\0A\09mov eax, {{.*}}__MSASMLABEL_.4__label", "~{eax},~{dirflag},~{fpsr},~{flags}"()
 }
+
+typedef union _LARGE_INTEGER {
+  struct {
+    unsigned int LowPart;
+    unsigned int  HighPart;
+  };
+  struct {
+    unsigned int LowPart;
+    unsigned int  HighPart;
+  } u;
+  unsigned long long QuadPart;
+} LARGE_INTEGER, *PLARGE_INTEGER;
+
+int test_indirect_field(LARGE_INTEGER LargeInteger) {
+    __asm mov     eax, LargeInteger.LowPart
+}
+// CHECK-LABEL: define i32 @test_indirect_field(
+// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $1",
+
+// MS ASM containing labels must not be duplicated (PR23715).
+// CHECK: attributes [[ATTR1]] = { {{.*}}noduplicate{{.*}} }
diff --git a/test/CodeGen/ms-mm-align.c b/test/CodeGen/ms-mm-align.c
new file mode 100644
index 0000000..ae8e980
--- /dev/null
+++ b/test/CodeGen/ms-mm-align.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
+// RUN:         -triple i686--windows -emit-llvm %s -o - \
+// RUN:         | FileCheck %s -check-prefix CHECK
+
+// Intrin.h needs size_t, but -ffreestanding prevents us from getting it from
+// stddef.h.  Work around it with this typedef.
+typedef __SIZE_TYPE__ size_t;
+#include <Intrin.h>
+
+void capture_ptr(int* i);
+void test_mm_align16(int p) {
+  _MM_ALIGN16 int i;
+  capture_ptr(&i);
+}
+
+// CHECK: alloca i32, align 16
diff --git a/test/CodeGen/ms_abi.c b/test/CodeGen/ms_abi.c
index 7c5c26f..2cca249 100644
--- a/test/CodeGen/ms_abi.c
+++ b/test/CodeGen/ms_abi.c
@@ -1,20 +1,145 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 -emit-llvm < %s | FileCheck -check-prefix=FREEBSD %s
 // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s
 
+struct foo {
+  int x;
+  float y;
+  char z;
+};
+// FREEBSD: %[[STRUCT_FOO:.*]] = type { i32, float, i8 }
+// WIN64: %[[STRUCT_FOO:.*]] = type { i32, float, i8 }
+
 void __attribute__((ms_abi)) f1(void);
 void __attribute__((sysv_abi)) f2(void);
 void f3(void) {
-// FREEBSD: define void @f3()
-// WIN64: define void @f3()
+  // FREEBSD-LABEL: define void @f3()
+  // WIN64-LABEL: define void @f3()
   f1();
-// FREEBSD: call x86_64_win64cc void @f1()
-// WIN64: call void @f1()
+  // FREEBSD: call x86_64_win64cc void @f1()
+  // WIN64: call void @f1()
   f2();
-// FREEBSD: call void @f2()
-// WIN64: call x86_64_sysvcc void @f2()
+  // FREEBSD: call void @f2()
+  // WIN64: call x86_64_sysvcc void @f2()
 }
 // FREEBSD: declare x86_64_win64cc void @f1()
 // FREEBSD: declare void @f2()
 // WIN64: declare void @f1()
 // WIN64: declare x86_64_sysvcc void @f2()
 
+// Win64 ABI varargs
+void __attribute__((ms_abi)) f4(int a, ...) {
+  // FREEBSD-LABEL: define x86_64_win64cc void @f4
+  // WIN64-LABEL: define void @f4
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a);
+  // FREEBSD: %[[AP:.*]] = alloca i8*
+  // FREEBSD: call void @llvm.va_start
+  // WIN64: %[[AP:.*]] = alloca i8*
+  // WIN64: call void @llvm.va_start
+  int b = __builtin_va_arg(ap, int);
+  // FREEBSD: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // FREEBSD: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // FREEBSD: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_ms_va_list ap2;
+  __builtin_ms_va_copy(ap2, ap);
+  // FREEBSD: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  // WIN64: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  __builtin_ms_va_end(ap);
+  // FREEBSD: call void @llvm.va_end
+  // WIN64: call void @llvm.va_end
+}
+
+// Let's verify that normal va_lists work right on Win64, too.
+void f5(int a, ...) {
+  // WIN64-LABEL: define void @f5
+  __builtin_va_list ap;
+  __builtin_va_start(ap, a);
+  // WIN64: %[[AP:.*]] = alloca i8*
+  // WIN64: call void @llvm.va_start
+  int b = __builtin_va_arg(ap, int);
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_va_list ap2;
+  __builtin_va_copy(ap2, ap);
+  // WIN64: call void @llvm.va_copy
+  __builtin_va_end(ap);
+  // WIN64: call void @llvm.va_end
+}
+
+// Verify that using a Win64 va_list from a System V function works.
+void __attribute__((sysv_abi)) f6(__builtin_ms_va_list ap) {
+  // FREEBSD-LABEL: define void @f6
+  // FREEBSD: store i8* %ap, i8** %[[AP:.*]]
+  // WIN64-LABEL: define x86_64_sysvcc void @f6
+  // WIN64: store i8* %ap, i8** %[[AP:.*]]
+  int b = __builtin_va_arg(ap, int);
+  // FREEBSD: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // FREEBSD: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // FREEBSD: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_ms_va_list ap2;
+  __builtin_ms_va_copy(ap2, ap);
+  // FREEBSD: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  // WIN64: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+}
diff --git a/test/CodeGen/ms_struct-pack.c b/test/CodeGen/ms_struct-pack.c
index 6486f29..6382f3b 100644
--- a/test/CodeGen/ms_struct-pack.c
+++ b/test/CodeGen/ms_struct-pack.c
@@ -133,12 +133,12 @@
   unsigned long e : 1;
 } __attribute__((__ms_struct__));
 
-// CHECK:      Type: struct test0
-// CHECK-NEXT: Record:
-// CHECK-NEXT: Layout:
-// CHECK-NEXT:   Size:64
-// CHECK-NEXT:   DataSize:64
-// CHECK-NEXT:   Alignment:16
-// CHECK-NEXT:   FieldOffsets: [0, 8, 16, 32, 42]>
+// CHECK:             0 | struct test0
+// CHECK-NEXT:    0:0-7 |   unsigned long a
+// CHECK-NEXT:    1:0-7 |   unsigned long b
+// CHECK-NEXT:    2:0-7 |   unsigned long c
+// CHECK-NEXT:    4:0-9 |   unsigned long d
+// CHECK-NEXT:    5:2-2 |   unsigned long e
+// CHECK-NEXT:          | [sizeof=8, align=2]
 
 static int test0[(sizeof(struct test0) == 8) ? 1 : -1];
diff --git a/test/CodeGen/ms_this.cpp b/test/CodeGen/ms_this.cpp
new file mode 100644
index 0000000..8647a5b
--- /dev/null
+++ b/test/CodeGen/ms_this.cpp
@@ -0,0 +1,57 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-pc-win32 -fasm-blocks -emit-llvm %s -o - | FileCheck %s
+class t1 {
+public:
+  double a;
+  void runc();
+};
+
+class t2 {
+public:
+  double a;
+  void runc();
+};
+
+// CHECK: define void @"\01?runc@t2@@
+void t2::runc() {
+  double num = 0;
+  __asm {
+      mov rax,[this]
+      // CHECK: [[THIS_ADDR_T2:%.+]] = alloca %class.t2*
+      // CHECK: [[THIS1_T2:%.+]] = load %class.t2*, %class.t2** [[THIS_ADDR_T2]],
+      // CHECK: call void asm sideeffect inteldialect "mov rax,qword ptr $1{{.*}}%class.t2* [[THIS1_T2]]
+      mov rbx,[rax]
+      mov num, rbx
+	   };
+}
+
+// CHECK: define void @"\01?runc@t1@@
+void t1::runc() {
+  double num = 0;
+  __asm {
+       mov rax,[this]
+       // CHECK: [[THIS_ADDR_T1:%.+]] = alloca %class.t1*
+       // CHECK: [[THIS1_T1:%.+]] = load %class.t1*, %class.t1** [[THIS_ADDR_T1]],
+       // CHECK: call void asm sideeffect inteldialect "mov rax,qword ptr $1{{.*}}%class.t1* [[THIS1_T1]]
+        mov rbx,[rax]
+        mov num, rbx
+	   };
+}
+
+struct s {
+  int a;
+  // CHECK: define linkonce_odr void @"\01?func@s@@
+  void func() {
+    __asm mov rax, [this]
+    // CHECK: [[THIS_ADDR_S:%.+]] = alloca %struct.s*
+    // CHECK: [[THIS1_S:%.+]] = load %struct.s*, %struct.s** [[THIS_ADDR_S]],
+    // CHECK: call void asm sideeffect inteldialect "mov rax, qword ptr $0{{.*}}%struct.s* [[THIS1_S]]
+  }
+} f3;
+
+int main() {
+  f3.func();
+  f3.a=1;
+  return 0;
+}
diff --git a/test/CodeGen/mult-alt-x86.c b/test/CodeGen/mult-alt-x86.c
index 4e2a69d..c74c284 100644
--- a/test/CodeGen/mult-alt-x86.c
+++ b/test/CodeGen/mult-alt-x86.c
@@ -110,9 +110,9 @@
 }
 
 // CHECK: @single_Y
-void single_Y0()
+void single_Y()
 {
-  // Y constraint currently broken.
+  // 'Y' constraint currently broken.
   //asm("foo %1,%0" : "=Y0" (mout0) : "Y0" (min1));
   //asm("foo %1,%0" : "=Yz" (mout0) : "Yz" (min1));
   //asm("foo %1,%0" : "=Yt" (mout0) : "Yt" (min1));
@@ -144,8 +144,12 @@
 // CHECK: @single_L
 void single_L()
 {
-  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 1)
-  asm("foo %1,%0" : "=m" (mout0) : "L" (1));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 255)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xff));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 65535)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xffff));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 -1)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xffffffff));
 }
 
 // CHECK: @single_M
diff --git a/test/CodeGen/named_reg_global.c b/test/CodeGen/named_reg_global.c
index 8f9a9c6..1da6257 100644
--- a/test/CodeGen/named_reg_global.c
+++ b/test/CodeGen/named_reg_global.c
@@ -1,16 +1,26 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple arm64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple armv7-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X86-64
+// RUN: %clang_cc1 -triple arm64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+// RUN: %clang_cc1 -triple armv7-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
 
 // CHECK-NOT: @sp = common global
+
+#if defined(__x86_64__)
+register unsigned long current_stack_pointer asm("rsp");
+#else
 register unsigned long current_stack_pointer asm("sp");
+#endif
+
 struct p4_Thread {
   struct {
     int len;
   } word;
 };
 // Testing pointer types as well
+#if defined(__x86_64__)
+register struct p4_Thread *p4TH asm("rsp");
+#else
 register struct p4_Thread *p4TH asm("sp");
+#endif
 
 // CHECK: define{{.*}} i[[bits:[0-9]+]] @get_stack_pointer_addr()
 // CHECK: [[ret:%[0-9]+]] = call i[[bits]] @llvm.read_register.i[[bits]](metadata !0)
@@ -43,5 +53,7 @@
 // CHECK: %[[regw:[0-9]+]] = ptrtoint %struct.p4_Thread* %{{.*}} to i[[bits]]
 // CHECK: call void @llvm.write_register.i[[bits]](metadata !0, i[[bits]] %[[regw]])
 
-// CHECK: !llvm.named.register.sp = !{!0}
-// CHECK: !0 = !{!"sp"}
+// CHECK-X86-64: !llvm.named.register.rsp = !{!0}
+// CHECK-X86-64: !0 = !{!"rsp"}
+// CHECK-ARM: !llvm.named.register.sp = !{!0}
+// CHECK-ARM: !0 = !{!"sp"}
diff --git a/test/CodeGen/nomathbuiltin.c b/test/CodeGen/nomathbuiltin.c
index f80cd91..35e7c56 100644
--- a/test/CodeGen/nomathbuiltin.c
+++ b/test/CodeGen/nomathbuiltin.c
@@ -7,6 +7,6 @@
 
 double foo(double a, double b) {
   return pow(a, b);
-// CHECK: call double @pow
+// CHECK: call {{.*}}double @pow
 }
 
diff --git a/test/CodeGen/nvptx-abi.c b/test/CodeGen/nvptx-abi.c
index 58ad6a1..7973bf0 100644
--- a/test/CodeGen/nvptx-abi.c
+++ b/test/CodeGen/nvptx-abi.c
@@ -21,14 +21,14 @@
 
 void foo(float4_t x) {
 // CHECK-LABEL: @foo
-// CHECK: %struct.float4_s* byval %x
+// CHECK: %struct.float4_s* byval align 4 %x
 }
 
 void fooN(float4_t x, float4_t y, float4_t z) {
 // CHECK-LABEL: @fooN
-// CHECK: %struct.float4_s* byval %x
-// CHECK: %struct.float4_s* byval %y
-// CHECK: %struct.float4_s* byval %z
+// CHECK: %struct.float4_s* byval align 4 %x
+// CHECK: %struct.float4_s* byval align 4 %y
+// CHECK: %struct.float4_s* byval align 4 %z
 }
 
 typedef struct nested_s {
@@ -39,5 +39,5 @@
 
 void baz(nested_t x) {
 // CHECK-LABEL: @baz
-// CHECK: %struct.nested_s* byval %x)
+// CHECK: %struct.nested_s* byval align 8 %x)
 }
diff --git a/test/CodeGen/nvptx-inlineasm-ptx.c b/test/CodeGen/nvptx-inlineasm-ptx.c
index 0a19123..e5345d9 100644
--- a/test/CodeGen/nvptx-inlineasm-ptx.c
+++ b/test/CodeGen/nvptx-inlineasm-ptx.c
@@ -8,8 +8,8 @@
   unsigned short us;
   int            i;
   unsigned int   ui;
-  long           l;
-  unsigned long  ul;
+  long long      ll;
+  unsigned long long ull;
   float          f;
   double         d;
 
@@ -29,9 +29,9 @@
   asm volatile ("mov.b32 %0, %1;" : "=r"(ui) : "r"(ui));
 
   // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
-  asm volatile ("mov.b64 %0, %1;" : "=l"(l) : "l"(l));
+  asm volatile ("mov.b64 %0, %1;" : "=l"(ll) : "l"(ll));
   // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
-  asm volatile ("mov.b64 %0, %1;" : "=l"(ul) : "l"(ul));
+  asm volatile ("mov.b64 %0, %1;" : "=l"(ull) : "l"(ull));
 
   // CHECK: float asm sideeffect "mov.b32 $0, $1;", "=f,f"
   asm volatile ("mov.b32 %0, %1;" : "=f"(f) : "f"(f));
diff --git a/test/CodeGen/object-size.c b/test/CodeGen/object-size.c
index 3fa038a..610e541 100644
--- a/test/CodeGen/object-size.c
+++ b/test/CodeGen/object-size.c
@@ -15,7 +15,7 @@
 
 // CHECK-LABEL: define void @test1
 void test1() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
   strcpy(&gbuf[4], "Hi there");
 }
 
@@ -33,7 +33,7 @@
 
 // CHECK-LABEL: define void @test4
 void test4() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy((char*)(void*)&gbuf[-1], "Hi there");
 }
 
@@ -127,7 +127,7 @@
   strcpy(gp += 1, "Hi there");
 }
 
-// CHECK: @test17
+// CHECK-LABEL: @test17
 void test17() {
   // CHECK: store i32 -1
   gi = __builtin_object_size(gp++, 0);
@@ -139,10 +139,381 @@
   gi = __builtin_object_size(gp++, 3);
 }
 
-// CHECK: @test18
+// CHECK-LABEL: @test18
 unsigned test18(int cond) {
   int a[4], b[4];
   // CHECK: phi i32*
   // CHECK: call i64 @llvm.objectsize.i64
   return __builtin_object_size(cond ? a : b, 0);
 }
+
+// CHECK-LABEL: @test19
+void test19() {
+  struct {
+    int a, b;
+  } foo;
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&foo.a, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.a, 1);
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&foo.a, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.a, 3);
+
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 1);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 3);
+}
+
+// CHECK-LABEL: @test20
+void test20() {
+  struct { int t[10]; } t[10];
+
+  // CHECK: store i32 380
+  gi = __builtin_object_size(&t[0].t[5], 0);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&t[0].t[5], 1);
+  // CHECK: store i32 380
+  gi = __builtin_object_size(&t[0].t[5], 2);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&t[0].t[5], 3);
+}
+
+// CHECK-LABEL: @test21
+void test21() {
+  struct { int t; } t;
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 3);
+}
+
+// CHECK-LABEL: @test22
+void test22() {
+  struct { int t[10]; } t[10];
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3);
+}
+
+struct Test23Ty { int a; int t[10]; };
+
+// CHECK-LABEL: @test23
+void test23(struct Test23Ty *p) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(p, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(p, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(p, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(p, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&p->a, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&p->a, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&p->a, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&p->a, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&p->t[5], 0);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&p->t[5], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&p->t[5], 2);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&p->t[5], 3);
+}
+
+// PR24493 -- ICE if __builtin_object_size called with NULL and (Type & 1) != 0
+// CHECK-LABEL: @test24
+void test24() {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0, 3);
+}
+
+// CHECK-LABEL: @test25
+void test25() {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0x1000, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0x1000, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0 + 0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0 + 0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0 + 0x1000, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0 + 0x1000, 3);
+}
+
+// CHECK-LABEL: @test26
+void test26() {
+  struct { int v[10]; } t[10];
+
+  // CHECK: store i32 316
+  gi = __builtin_object_size(&t[1].v[11], 0);
+  // CHECK: store i32 312
+  gi = __builtin_object_size(&t[1].v[12], 1);
+  // CHECK: store i32 308
+  gi = __builtin_object_size(&t[1].v[13], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[1].v[14], 3);
+}
+
+struct Test27IncompleteTy;
+
+// CHECK-LABEL: @test27
+void test27(struct Test27IncompleteTy *t) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(t, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(t, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(t, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(t, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size(&test27, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size(&test27, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size(&test27, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&test27, 3);
+}
+
+// The intent of this test is to ensure that __builtin_object_size treats `&foo`
+// and `(T*)&foo` identically, when used as the pointer argument.
+// CHECK-LABEL: @test28
+void test28() {
+  struct { int v[10]; } t[10];
+
+#define addCasts(s) ((char*)((short*)(s)))
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 0);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 1);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 2);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 3);
+
+  // CHECK: store i32 356
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 0);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 1);
+  // CHECK: store i32 356
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 2);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 3);
+#undef addCasts
+}
+
+struct DynStructVar {
+  char fst[16];
+  char snd[];
+};
+
+struct DynStruct0 {
+  char fst[16];
+  char snd[0];
+};
+
+struct DynStruct1 {
+  char fst[16];
+  char snd[1];
+};
+
+struct StaticStruct {
+  char fst[16];
+  char snd[2];
+};
+
+// CHECK-LABEL: @test29
+void test29(struct DynStructVar *dv, struct DynStruct0 *d0,
+            struct DynStruct1 *d1, struct StaticStruct *ss) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(dv->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(dv->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(dv->snd, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(dv->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d0->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d0->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(d0->snd, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(d0->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d1->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d1->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(d1->snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(d1->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(ss->snd, 0);
+  // CHECK: store i32 2
+  gi = __builtin_object_size(ss->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(ss->snd, 2);
+  // CHECK: store i32 2
+  gi = __builtin_object_size(ss->snd, 3);
+}
+
+// CHECK-LABEL: @test30
+void test30() {
+  struct { struct DynStruct1 fst, snd; } *nested;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->fst.snd, 0);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->fst.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(nested->fst.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->fst.snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->snd.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->snd.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(nested->snd.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->snd.snd, 3);
+
+  union { struct DynStruct1 d1; char c[1]; } *u;
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->c, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->c, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(u->c, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(u->c, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->d1.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->d1.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(u->d1.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(u->d1.snd, 3);
+}
+
+// CHECK-LABEL: @test31
+void test31() {
+  // Miscellaneous 'writing off the end' detection tests
+  struct DynStructVar *dsv;
+  struct DynStruct0 *ds0;
+  struct DynStruct1 *ds1;
+  struct StaticStruct *ss;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(ds1[9].snd, 1);
+
+  // CHECH: store i32 2
+  gi = __builtin_object_size(&ss[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&ds1[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&ds0[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&dsv[9].snd[0], 1);
+}
diff --git a/test/CodeGen/object-size.cpp b/test/CodeGen/object-size.cpp
new file mode 100644
index 0000000..81b44a5
--- /dev/null
+++ b/test/CodeGen/object-size.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+// C++-specific tests for __builtin_object_size
+
+int gi;
+
+// CHECK-LABEL: define void @_Z5test1v()
+void test1() {
+  // Guaranteeing that our cast removal logic doesn't break more interesting
+  // cases.
+  struct A { int a; };
+  struct B { int b; };
+  struct C: public A, public B {};
+
+  C c;
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((B*)&c, 0);
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)(A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+// CHECK-LABEL: define void @_Z5test2v()
+void test2() {
+  struct A { char buf[16]; };
+  struct B : A {};
+  struct C { int i; B bs[1]; } *c;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&c->bs[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0], 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size((A*)&c->bs[0], 0);
+  // CHECK: store i32 16
+  gi = __builtin_object_size((A*)&c->bs[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size((A*)&c->bs[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size((A*)&c->bs[0], 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0].buf[0], 0);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0].buf[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&c->bs[0].buf[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0].buf[0], 3);
+}
diff --git a/test/CodeGen/openmp_default_simd_align.c b/test/CodeGen/openmp_default_simd_align.c
new file mode 100644
index 0000000..dcab5ab
--- /dev/null
+++ b/test/CodeGen/openmp_default_simd_align.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s
+
+enum e0 { E0 };
+struct s0 {
+  enum e0         a:31;
+};
+
+int f0() {
+  return __builtin_omp_required_simd_align(struct s0);
+  // CHECK: ret i32 16
+}
diff --git a/test/CodeGen/overloadable.c b/test/CodeGen/overloadable.c
index 8b40e4d..4946c6d 100644
--- a/test/CodeGen/overloadable.c
+++ b/test/CodeGen/overloadable.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
 // CHECK: _Z1fPA10_1X
+// CHECK: _Z1fPFvE
 
 int __attribute__((overloadable)) f(int x) { return x; }
 float __attribute__((overloadable)) f(float x) { return x; }
@@ -13,6 +14,8 @@
 
 void __attribute__((overloadable)) f(int x, int y, ...) { }
 
+void __attribute__((overloadable)) f(void (*x)()) {}
+
 int main() {
   int iv = 17;
   float fv = 3.0f;
diff --git a/test/CodeGen/override-layout.c b/test/CodeGen/override-layout.c
index 57de8b5..9907fec 100644
--- a/test/CodeGen/override-layout.c
+++ b/test/CodeGen/override-layout.c
@@ -1,7 +1,6 @@
-// RUN: %clang_cc1 -w -fdump-record-layouts %s > %t.layouts
-// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.before
+// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.layouts
 // RUN: %clang_cc1 -w -DPACKED= -DALIGNED16= -fdump-record-layouts-simple -foverride-record-layout=%t.layouts %s > %t.after
-// RUN: diff %t.before %t.after
+// RUN: diff %t.layouts %t.after
 // RUN: FileCheck %s < %t.after
 
 // If not explicitly disabled, set PACKED to the packed attribute.
diff --git a/test/CodeGen/packed-arrays.c b/test/CodeGen/packed-arrays.c
index 993d88e..bb742c6 100644
--- a/test/CodeGen/packed-arrays.c
+++ b/test/CodeGen/packed-arrays.c
@@ -64,10 +64,12 @@
   return *(a->x + 1);
 }
 
+// Note that 'y' still causes struct s1 to be four-byte aligned.
+
 // Note that we are incompatible with GCC on this example.
 // 
 // CHECK-LABEL: define i32 @f1_a
-// CHECK:   load i32, i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_b
 // CHECK:   load i32, i32* %{{.*}}, align 4
@@ -79,7 +81,7 @@
 // CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_d
-// CHECK:   load i32, i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 int f1_a(struct s1 *a) {
   return a->x[1];
diff --git a/test/CodeGen/packed-nest-unpacked.c b/test/CodeGen/packed-nest-unpacked.c
index 1dcd2ec..e2bbd41 100644
--- a/test/CodeGen/packed-nest-unpacked.c
+++ b/test/CodeGen/packed-nest-unpacked.c
@@ -60,6 +60,35 @@
 
 unsigned test7() {
   // CHECK: @test7
-  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 4
+  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
   return gbitfield.y.b2;
 }
+
+void test8(unsigned x) {
+  // CHECK: @test8
+  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
+  // CHECK: store i32 {{.*}}, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 1
+  gbitfield.y.b2 = x;
+}
+
+struct TBitfield
+{
+  long a;
+  char b;
+  unsigned c:15;
+};
+struct TBitfield tbitfield;
+
+unsigned test9() {
+  // CHECK: @test9
+  // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
+  return tbitfield.c;
+}
+
+void test10(unsigned x) {
+  // CHECK: @test10
+  // CHECK: load i16, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
+  // CHECK: store i16 {{.*}}, i16* getelementptr inbounds (%struct.TBitfield, %struct.TBitfield* @tbitfield, i32 0, i32 2), align 1
+  tbitfield.c = x;
+}
+
diff --git a/test/CodeGen/packed-structure.c b/test/CodeGen/packed-structure.c
index 8de31d6..7d1183d 100644
--- a/test/CodeGen/packed-structure.c
+++ b/test/CodeGen/packed-structure.c
@@ -25,7 +25,7 @@
 // with align 1 (in 2363.1 at least).
 //
 // CHECK-FUNCTIONS-LABEL: define i32 @s0_load_y
-// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32, i32* {{.*}}, align 1
+// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32, i32* {{.*}}, align 4
 // CHECK-FUNCTIONS: ret i32 [[s0_load_y]]
 int s0_load_y(struct s0 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s0_copy
@@ -95,6 +95,6 @@
 // CHECK-FUNCTIONS-LABEL: define i32 @test3(
 int test3(struct s3 *ptr) {
   // CHECK-FUNCTIONS:      [[PTR:%.*]] = getelementptr inbounds {{%.*}}, {{%.*}}* {{%.*}}, i32 0, i32 1
-  // CHECK-FUNCTIONS-NEXT: load i32, i32* [[PTR]], align 1
+  // CHECK-FUNCTIONS-NEXT: load i32, i32* [[PTR]], align 2
   return ptr->anInt;
 }
diff --git a/test/CodeGen/palignr.c b/test/CodeGen/palignr.c
index 1712df5..5a77597 100644
--- a/test/CodeGen/palignr.c
+++ b/test/CodeGen/palignr.c
@@ -4,13 +4,13 @@
 #define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n)))
 typedef __attribute__((vector_size(16))) int int4;
 
-// CHECK: palignr
+// CHECK: palignr $15, %xmm1, %xmm0
 int4 align1(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 15); }
 // CHECK: ret
 // CHECK: ret
 // CHECK-NOT: palignr
 int4 align2(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 16); }
-// CHECK: psrldq
+// CHECK: psrldq $1, %xmm0
 int4 align3(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 17); }
 // CHECK: xor
 int4 align4(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 32); }
diff --git a/test/CodeGen/pass-object-size.c b/test/CodeGen/pass-object-size.c
new file mode 100644
index 0000000..1ad3f85
--- /dev/null
+++ b/test/CodeGen/pass-object-size.c
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -O0 %s -o - 2>&1 | FileCheck %s
+
+typedef unsigned long size_t;
+
+struct Foo {
+  int t[10];
+};
+
+#define PS(N) __attribute__((pass_object_size(N)))
+
+int gi = 0;
+
+// CHECK-LABEL: define i32 @ObjectSize0(i8* %{{.*}}, i64)
+int ObjectSize0(void *const p PS(0)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 0);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize1(i8* %{{.*}}, i64)
+int ObjectSize1(void *const p PS(1)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 1);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize2(i8* %{{.*}}, i64)
+int ObjectSize2(void *const p PS(2)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 2);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize3(i8* %{{.*}}, i64)
+int ObjectSize3(void *const p PS(3)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 3);
+}
+
+// CHECK-LABEL: define void @test1
+void test1() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 360)
+  gi = ObjectSize0(&t[1]);
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 360)
+  gi = ObjectSize1(&t[1]);
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 360)
+  gi = ObjectSize2(&t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 360)
+  gi = ObjectSize3(&t[1]);
+
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 356)
+  gi = ObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 36)
+  gi = ObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 356)
+  gi = ObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 36)
+  gi = ObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test2
+void test2(struct Foo *t) {
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 36)
+  gi = ObjectSize1(&t->t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 36)
+  gi = ObjectSize3(&t->t[1]);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize0Pv
+int NoViableOverloadObjectSize0(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 0);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize1Pv
+int NoViableOverloadObjectSize1(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 1);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize2Pv
+int NoViableOverloadObjectSize2(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 2);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize3Pv
+int NoViableOverloadObjectSize3(void *const p) __attribute__((overloadable)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 3);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize0Pv
+// CHECK-NOT: @llvm.objectsize
+int NoViableOverloadObjectSize0(void *const p PS(0))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 0);
+}
+
+int NoViableOverloadObjectSize1(void *const p PS(1))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 1);
+}
+
+int NoViableOverloadObjectSize2(void *const p PS(2))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 2);
+}
+
+int NoViableOverloadObjectSize3(void *const p PS(3))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 3);
+}
+
+const static int SHOULDNT_BE_CALLED = -100;
+int NoViableOverloadObjectSize0(void *const p PS(0))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize1(void *const p PS(1))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize2(void *const p PS(2))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize3(void *const p PS(3))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+// CHECK-LABEL: define void @test3
+void test3() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize0(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize1(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize2(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize3(&t[1]);
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 356)
+  gi = NoViableOverloadObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 356)
+  gi = NoViableOverloadObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test4
+void test4(struct Foo *t) {
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize0(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize1(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize2(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 0)
+  gi = NoViableOverloadObjectSize3(&t[1]);
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize3(&t[1].t[1]);
+}
+
+void test5() {
+  struct Foo t[10];
+
+  int (*f)(void *) = &NoViableOverloadObjectSize0;
+  gi = f(&t[1]);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize0
+int IndirectObjectSize0(void *const p PS(0)) {
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize0(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize1
+int IndirectObjectSize1(void *const p PS(1)) {
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize1(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize2
+int IndirectObjectSize2(void *const p PS(2)) {
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize2(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize3
+int IndirectObjectSize3(void *const p PS(3)) {
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize3(p);
+}
+
+int Overload0(void *, size_t, void *, size_t);
+int OverloadNoSize(void *, void *);
+
+int OverloadedObjectSize(void *const p PS(0),
+                         void *const c PS(0))
+    __attribute__((overloadable)) __asm__("Overload0");
+
+int OverloadedObjectSize(void *const p, void *const c)
+    __attribute__((overloadable)) __asm__("OverloadNoSize");
+
+// CHECK-LABEL: define void @test6
+void test6() {
+  int known[10], *opaque;
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(&known[0], &known[0]);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(&known[0], opaque);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(opaque, &known[0]);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(opaque, opaque);
+}
+
+int Identity(void *p, size_t i) { return i; }
+
+// CHECK-NOT: define void @AsmObjectSize
+int AsmObjectSize0(void *const p PS(0)) __asm__("Identity");
+
+int AsmObjectSize1(void *const p PS(1)) __asm__("Identity");
+
+int AsmObjectSize2(void *const p PS(2)) __asm__("Identity");
+
+int AsmObjectSize3(void *const p PS(3)) __asm__("Identity");
+
+// CHECK-LABEL: define void @test7
+void test7() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize0(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize1(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize2(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize3(&t[1]);
+
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 356)
+  gi = AsmObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 356)
+  gi = AsmObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test8
+void test8(struct Foo *t) {
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize3(&t[1].t[1]);
+}
+
+void DifferingObjectSize0(void *const p __attribute__((pass_object_size(0))));
+void DifferingObjectSize1(void *const p __attribute__((pass_object_size(1))));
+void DifferingObjectSize2(void *const p __attribute__((pass_object_size(2))));
+void DifferingObjectSize3(void *const p __attribute__((pass_object_size(3))));
+
+// CHECK-LABEL: define void @test9
+void test9(void *const p __attribute__((pass_object_size(0)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize2(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize0(p);
+  DifferingObjectSize1(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test10
+void test10(void *const p __attribute__((pass_object_size(1)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize2(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test11
+void test11(void *const p __attribute__((pass_object_size(2)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize2(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test12
+void test12(void *const p __attribute__((pass_object_size(3)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize2(p);
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test13
+void test13() {
+  // Ensuring that we don't lower objectsize if the expression has side-effects
+  char c[10];
+  char *p = c;
+
+  // CHECK: @llvm.objectsize
+  ObjectSize0(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  ObjectSize0(++p);
+  ObjectSize0(p++);
+}
diff --git a/test/CodeGen/pclmul-builtins.c b/test/CodeGen/pclmul-builtins.c
index cb0af28..ebca899 100644
--- a/test/CodeGen/pclmul-builtins.c
+++ b/test/CodeGen/pclmul-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/popcnt-builtins.c b/test/CodeGen/popcnt-builtins.c
index f072b29..5ae40c7 100644
--- a/test/CodeGen/popcnt-builtins.c
+++ b/test/CodeGen/popcnt-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,11 +6,21 @@
 #include <x86intrin.h>
 
 unsigned int test_mm_popcnt_u32(unsigned int __X) {
-  // CHECK: @llvm.ctpop.i32
+  //CHECK: call i32 @llvm.ctpop.i32
   return _mm_popcnt_u32(__X);
 }
 
+unsigned int test_popcnt_32(int __X) {
+  //CHECK: call i32 @llvm.ctpop.i32
+  return _popcnt32(__X);
+}
+
 unsigned long long test_mm_popcnt_u64(unsigned long long __X) {
-  // CHECK: @llvm.ctpop.i64
+  //CHECK: call i64 @llvm.ctpop.i64
   return _mm_popcnt_u64(__X);
 }
+
+unsigned long long test_popcnt_64(long long __X) {
+  //CHECK: call i64 @llvm.ctpop.i64
+  return _popcnt64(__X);
+}
diff --git a/test/CodeGen/ppc-sfvarargs.c b/test/CodeGen/ppc-sfvarargs.c
new file mode 100644
index 0000000..924d9c5
--- /dev/null
+++ b/test/CodeGen/ppc-sfvarargs.c
@@ -0,0 +1,17 @@
+// RUN: %clang -O0 --target=powerpc-unknown-linux-gnu -EB -msoft-float -S -emit-llvm %s -o - | FileCheck %s
+
+#include <stdarg.h>
+void test(char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  va_arg(ap, double);
+  va_end(ap);
+}
+
+void foo() {
+  double a;
+  test("test",a);
+}
+// CHECK: %{{[0-9]+}} = add i8 %{{[0-9]+|numUsedRegs}}, 1
+// CHECK: %{{[0-9]+}} = and i8 %{{[0-9]+}}, -2
+// CHECK: %{{[0-9]+}} = mul i8 %{{[0-9]+}}, 4
diff --git a/test/CodeGen/ppc-varargs-struct.c b/test/CodeGen/ppc-varargs-struct.c
index 1c983c0..1ad57c2 100644
--- a/test/CodeGen/ppc-varargs-struct.c
+++ b/test/CodeGen/ppc-varargs-struct.c
@@ -19,89 +19,73 @@
 // CHECK: bitcast %struct.x* %t to i8*
 // CHECK: bitcast %struct.x* %{{[0-9]+}} to i8*
 // CHECK: call void @llvm.memcpy
-// CHECK-PPC:  [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
-// CHECK-PPC-NEXT:  [[GPRPTR:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY]] to i8*
-// CHECK-PPC-NEXT:  [[ZERO:%[0-9]+]] = ptrtoint i8* [[GPRPTR]] to i32
-// CHECK-PPC-NEXT:  [[ONE:%[0-9]+]] = add i32 [[ZERO]], 1
-// CHECK-PPC-NEXT:  [[TWO:%[0-9]+]] = inttoptr i32 [[ONE]] to i8*
-// CHECK-PPC-NEXT:  [[THREE:%[0-9]+]] = add i32 [[ONE]], 3
-// CHECK-PPC-NEXT:  [[FOUR:%[0-9]+]] = inttoptr i32 [[THREE]] to i8**
-// CHECK-PPC-NEXT:  [[FIVE:%[0-9]+]] = add i32 [[THREE]], 4
-// CHECK-PPC-NEXT:  [[SIX:%[0-9]+]] = inttoptr i32 [[FIVE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR:%[a-z0-9]+]] = load i8, i8* [[GPRPTR]]
-// CHECK-PPC-NEXT:  [[FPR:%[a-z0-9]+]] = load i8, i8* [[TWO]] 
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%[a-z_0-9]+]] = load i8*, i8** [[FOUR]]
-// CHECK-PPC-NEXT:  [[SEVEN:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%[a-z_0-9]+]] = load i8*, i8** [[SIX]]
-// CHECK-PPC-NEXT:  [[EIGHT:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA]] to i32
-// CHECK-PPC-NEXT:  [[COND:%[a-z0-9]+]] = icmp ult i8 [[GPR]], 8
-// CHECK-PPC-NEXT:  [[NINE:%[0-9]+]] = mul i8 [[GPR]], 4
-// CHECK-PPC-NEXT:  [[TEN:%[0-9]+]] = sext i8 [[NINE]] to i32
-// CHECK-PPC-NEXT:  [[ELEVEN:%[0-9]+]] = add i32 [[EIGHT]], [[TEN]]
-// CHECK-PPC-NEXT:  br i1 [[COND]], label [[USING_REGS:%[a-z_0-9]+]], label [[USING_OVERFLOW:%[a-z_0-9]+]]
+
+// CHECK-PPC:  [[ARRAYDECAY:%.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPRPTR:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPR:%.+]] = load i8, i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  [[COND:%.+]] = icmp ult i8 [[GPR]], 8
+// CHECK-PPC-NEXT:  br i1 [[COND]], label %[[USING_REGS:[a-z_0-9]+]], label %[[USING_OVERFLOW:[a-z_0-9]+]]
 //
-// CHECK-PPC1:[[USING_REGS]]
-// CHECK-PPC:  [[TWELVE:%[0-9]+]] = inttoptr i32 [[ELEVEN]] to %struct.x*
-// CHECK-PPC-NEXT:  [[THIRTEEN:%[0-9]+]] = add i8 [[GPR]], 1
-// CHECK-PPC-NEXT:  store i8 [[THIRTEEN]], i8* [[GPRPTR]]
-// CHECK-PPC-NEXT:  br label [[CONT:%[a-z0-9]+]]
+// CHECK-PPC:[[USING_REGS]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA_P:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 4
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%.+]] = load i8*, i8** [[REGSAVE_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[OFFSET:%.+]] = mul i8 [[GPR]], 4
+// CHECK-PPC-NEXT:  [[RAW_REGADDR:%.+]] = getelementptr inbounds i8, i8* [[REGSAVE_AREA]], i8 [[OFFSET]]
+// CHECK-PPC-NEXT:  [[REGADDR:%.+]] = bitcast i8* [[RAW_REGADDR]] to %struct.x**
+// CHECK-PPC-NEXT:  [[USED_GPR:%[0-9]+]] = add i8 [[GPR]], 1
+// CHECK-PPC-NEXT:  store i8 [[USED_GPR]], i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT:[a-z0-9]+]]
 //
-// CHECK-PPC1:[[USING_OVERFLOW]]
-// CHECK-PPC:  [[FOURTEEN:%[0-9]+]] = inttoptr i32 [[SEVEN]] to %struct.x*
-// CHECK-PPC-NEXT:  [[FIFTEEN:%[0-9]+]] = add i32 [[SEVEN]], 4
-// CHECK-PPC-NEXT:  [[SIXTEEN:%[0-9]+]] = inttoptr i32 [[FIFTEEN]] to i8*
-// CHECK-PPC-NEXT:  store i8* [[SIXTEEN]], i8** [[FOUR]]
-// CHECK-PPC-NEXT:  br label [[CONT]]
+// CHECK-PPC:[[USING_OVERFLOW]]
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  %{{[0-9]+}} =  ptrtoint i8* %argp.cur to i32
+// CHECK-PPC-NEXT:  %{{[0-9]+}} = add i32 %{{[0-9]+}}, 7
+// CHECK-PPC-NEXT:  %{{[0-9]+}} = and i32 %{{[0-9]+}}, -8
+// CHECK-PPC-NEXT:  %argp.cur.aligned = inttoptr i32 %{{[0-9]+}} to i8*
+// CHECK-PPC-NEXT:  [[MEMADDR:%.+]] = bitcast i8* %argp.cur.aligned to %struct.x**
+// CHECK-PPC-NEXT:  [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 4
+// CHECK-PPC-NEXT:  store i8* [[NEW_OVERFLOW_AREA:%[0-9]+]], i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT]]
 //
-// CHECK-PPC1:[[CONT]]
-// CHECK-PPC:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi %struct.x* [ [[TWELVE]], [[USING_REGS]] ], [ [[FOURTEEN]], [[USING_OVERFLOW]] ]
-// CHECK-PPC-NEXT:  [[AGGRPTR:%[a-z0-9]+]] = bitcast %struct.x* [[VAARG_ADDR]] to i8**
-// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load i8*, i8** [[AGGRPTR]]
-// CHECK-PPC-NEXT:  [[SEVENTEEN:%[0-9]+]] = bitcast %struct.x* %t to i8*
-// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[SEVENTEEN]], i8* [[AGGR]], i32 16, i32 8, i1 false)
+// CHECK-PPC:[[CONT]]
+// CHECK-PPC-NEXT:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi %struct.x** [ [[REGADDR]], %[[USING_REGS]] ], [ [[MEMADDR]], %[[USING_OVERFLOW]] ]
+// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[VAARG_ADDR]]
+// CHECK-PPC-NEXT:  [[DEST:%[0-9]+]] = bitcast %struct.x* %t to i8*
+// CHECK-PPC-NEXT:  [[SRC:%.+]] = bitcast %struct.x* [[AGGR]] to i8*
+// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST]], i8* [[SRC]], i32 16, i32 8, i1 false)
 
   int v = va_arg (ap, int);
-// CHECK: ptrtoint i8* %{{[a-z.0-9]*}} to i64
-// CHECK: add i64 %{{[0-9]+}}, 4
-// CHECK: inttoptr i64 %{{[0-9]+}} to i8*
+  
+// CHECK: getelementptr inbounds i8, i8* %{{[a-z.0-9]*}}, i64 4
 // CHECK: bitcast i8* %{{[0-9]+}} to i32*
-// CHECK-PPC:  [[ARRAYDECAY1:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
-// CHECK-PPC-NEXT:  [[GPRPTR1:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY1]] to i8*
-// CHECK-PPC-NEXT:  [[EIGHTEEN:%[0-9]+]] = ptrtoint i8* [[GPRPTR1]] to i32
-// CHECK-PPC-NEXT:  [[NINETEEN:%[0-9]+]] = add i32 [[EIGHTEEN]], 1
-// CHECK-PPC-NEXT:  [[TWENTY:%[0-9]+]] = inttoptr i32 [[NINETEEN]] to i8*
-// CHECK-PPC-NEXT:  [[TWENTYONE:%[0-9]+]] = add i32 [[NINETEEN]], 3
-// CHECK-PPC-NEXT:  [[TWENTYTWO:%[0-9]+]] = inttoptr i32 [[TWENTYONE]] to i8**
-// CHECK-PPC-NEXT:  [[TWENTYTHREE:%[0-9]+]] = add i32 [[TWENTYONE]], 4
-// CHECK-PPC-NEXT:  [[TWENTYFOUR:%[0-9]+]] = inttoptr i32 [[TWENTYTHREE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR1:%[a-z0-9]+]] = load i8, i8* [[GPRPTR1]]
-// CHECK-PPC-NEXT:  [[FPR1:%[a-z0-9]+]] = load i8, i8* [[TWENTY]]
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYTWO]]
-// CHECK-PPC-NEXT:  [[TWENTYFIVE:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA1]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYFOUR]]
-// CHECK-PPC-NEXT:  [[TWENTYSIX:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA1]] to i32
-// CHECK-PPC-NEXT:  [[COND1:%[a-z0-9]+]] = icmp ult i8 [[GPR1]], 8
-// CHECK-PPC-NEXT:  [[TWENTYSEVEN:%[0-9]+]] = mul i8 [[GPR1]], 4
-// CHECK-PPC-NEXT:  [[TWENTYEIGHT:%[0-9]+]] = sext i8 [[TWENTYSEVEN]] to i32
-// CHECK-PPC-NEXT:  [[TWENTYNINE:%[0-9]+]] = add i32 [[TWENTYSIX]], [[TWENTYEIGHT]]
-// CHECK-PPC-NEXT:  br i1 [[COND1]], label [[USING_REGS1:%[.a-z_0-9]+]], label [[USING_OVERFLOW1:%[.a-z_0-9]+]]
+// CHECK-PPC:       [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPRPTR:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPR:%.+]] = load i8, i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  [[COND:%.+]] = icmp ult i8 [[GPR]], 8
+// CHECK-PPC-NEXT:  br i1 [[COND]], label %[[USING_REGS:.+]], label %[[USING_OVERFLOW:.+]]{{$}}
 //
-// CHECK-PPC1:[[USING_REGS1]]:
-// CHECK-PPC:  [[THIRTY:%[0-9]+]] = inttoptr i32 [[TWENTYNINE]] to i32*
-// CHECK-PPC-NEXT:  [[THIRTYONE:%[0-9]+]] = add i8 [[GPR1]], 1
-// CHECK-PPC-NEXT:  store i8 [[THIRTYONE]], i8* [[GPRPTR1]]
-// CHECK-PPC-NEXT:  br label [[CONT1:%[a-z0-9]+]]
+// CHECK-PPC:[[USING_REGS]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA_P:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 4
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%.+]] = load i8*, i8** [[REGSAVE_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[OFFSET:%.+]] = mul i8 [[GPR]], 4
+// CHECK-PPC-NEXT:  [[RAW_REGADDR:%.+]] = getelementptr inbounds i8, i8* [[REGSAVE_AREA]], i8 [[OFFSET]]
+// CHECK-PPC-NEXT:  [[REGADDR:%.+]] = bitcast i8* [[RAW_REGADDR]] to i32*
+// CHECK-PPC-NEXT:  [[USED_GPR:%[0-9]+]] = add i8 [[GPR]], 1
+// CHECK-PPC-NEXT:  store i8 [[USED_GPR]], i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT:[a-z0-9]+]]
 //
-// CHECK-PPC1:[[USING_OVERFLOW1]]:
-// CHECK-PPC:  [[THIRTYTWO:%[0-9]+]] = inttoptr i32 [[TWENTYFIVE]] to i32*
-// CHECK-PPC-NEXT:  [[THIRTYTHREE:%[0-9]+]] = add i32 [[TWENTYFIVE]], 4
-// CHECK-PPC-NEXT:  [[THIRTYFOUR:%[0-9]+]] = inttoptr i32 [[THIRTYTHREE]] to i8*
-// CHECK-PPC-NEXT:  store i8* [[THIRTYFOUR]], i8** [[TWENTYTWO]]
-// CHECK-PPC-NEXT:  br label [[CONT1]]
+// CHECK-PPC:[[USING_OVERFLOW]]
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[MEMADDR:%.+]] = bitcast i8* [[OVERFLOW_AREA]] to i32*
+// CHECK-PPC-NEXT:  [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* [[OVERFLOW_AREA]], i32 4
+// CHECK-PPC-NEXT:  store i8* [[NEW_OVERFLOW_AREA]], i8** [[OVERFLOW_AREA_P]]
+// CHECK-PPC-NEXT:  br label %[[CONT]]
 //
-// CHECK-PPC1:[[CONT1]]:
-// CHECK-PPC:  [[VAARG_ADDR1:%[a-z.0-9]+]] = phi i32* [ [[THIRTY]], [[USING_REGS1]] ], [ [[THIRTYTWO]], [[USING_OVERFLOW1]] ]
-// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32, i32* [[VAARG_ADDR1]]
+// CHECK-PPC:[[CONT]]
+// CHECK-PPC-NEXT:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi i32* [ [[REGADDR]], %[[USING_REGS]] ], [ [[MEMADDR]], %[[USING_OVERFLOW]] ]
+// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32, i32* [[VAARG_ADDR]]
 // CHECK-PPC-NEXT:  store i32 [[THIRTYFIVE]], i32* %v, align 4
 
 #ifdef __powerpc64__
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c
index 8c4437a..6a04d0c 100644
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -41,18 +41,22 @@
 }
 
 // This case requires run-time realignment of the incoming struct
-// CHECK: define void @test7(i32 signext %x, %struct.test7* byval align 16)
+// CHECK-LABEL: define void @test7(i32 signext %x, %struct.test7* byval align 16)
 // CHECK: %y = alloca %struct.test7, align 32
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
 void test7 (int x, struct test7 y)
 {
 }
 
-// CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test1, align 4
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 8
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test1*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test1*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test1* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test1* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 8, i32 4, i1 false)
 struct test1 test1va (int x, ...)
 {
   struct test1 y;
@@ -63,15 +67,19 @@
   return y;
 }
 
-// CHECK: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test2, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test2*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test2*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test2* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test2* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
 struct test2 test2va (int x, ...)
 {
   struct test2 y;
@@ -82,15 +90,19 @@
   return y;
 }
 
-// CHECK: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test3, align 32
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 32
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 32
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test3*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test3*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test3* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test3* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 32, i32 16, i1 false)
 struct test3 test3va (int x, ...)
 {
   struct test3 y;
@@ -101,11 +113,15 @@
   return y;
 }
 
-// CHECK: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test4, align 4
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test4*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test4*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test4* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test4* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 12, i32 4, i1 false)
 struct test4 test4va (int x, ...)
 {
   struct test4 y;
@@ -116,11 +132,15 @@
   return y;
 }
 
-// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test_longdouble, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test_longdouble*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test_longdouble*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test_longdouble* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 8, i1 false)
 struct test_longdouble { long double x; };
 struct test_longdouble testva_longdouble (int x, ...)
 {
@@ -132,15 +152,19 @@
   return y;
 }
 
-// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test_vector, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test_vector*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test_vector*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test_vector* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
 struct test_vector { vector int x; };
 struct test_vector testva_vector (int x, ...)
 {
diff --git a/test/CodeGen/ppc64-complex-parms.c b/test/CodeGen/ppc64-complex-parms.c
index f5583a0..3f2a0c2 100644
--- a/test/CodeGen/ppc64-complex-parms.c
+++ b/test/CodeGen/ppc64-complex-parms.c
@@ -62,10 +62,10 @@
 // CHECK: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
 // CHECK: store float 2.000000e+00, float* %[[VAR2]]
 // CHECK: store float -2.500000e+00, float* %[[VAR3]]
-// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
-// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float, float* %[[VAR4]], align 1
-// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
-// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float, float* %[[VAR6]], align 1
+// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
+// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float, float* %[[VAR4]], align 4
+// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
+// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float, float* %[[VAR6]], align 4
 // CHECK: %{{[A-Za-z0-9.]+}} = call float @foo_float(float %[[VAR5]], float %[[VAR7]])
 
 void bar_double(void) {
@@ -78,10 +78,10 @@
 // CHECK: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
 // CHECK: store double 2.000000e+00, double* %[[VAR12]]
 // CHECK: store double -2.500000e+00, double* %[[VAR13]]
-// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
-// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double, double* %[[VAR14]], align 1
-// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
-// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double, double* %[[VAR16]], align 1
+// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
+// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double, double* %[[VAR14]], align 8
+// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
+// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double, double* %[[VAR16]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call double @foo_double(double %[[VAR15]], double %[[VAR17]])
 
 void bar_long_double(void) {
@@ -94,10 +94,10 @@
 // CHECK: %[[VAR23:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
 // CHECK: store ppc_fp128 0xM40000000000000000000000000000000, ppc_fp128* %[[VAR22]]
 // CHECK: store ppc_fp128 0xMC0040000000000000000000000000000, ppc_fp128* %[[VAR23]]
-// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
-// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR24]], align 1
-// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
-// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR26]], align 1
+// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
+// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR24]], align 16
+// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
+// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR26]], align 16
 // CHECK: %{{[A-Za-z0-9.]+}} = call ppc_fp128 @foo_long_double(ppc_fp128 %[[VAR25]], ppc_fp128 %[[VAR27]])
 
 void bar_int(void) {
@@ -110,10 +110,10 @@
 // CHECK: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
 // CHECK: store i32 2, i32* %[[VAR32]]
 // CHECK: store i32 -3, i32* %[[VAR33]]
-// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
-// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR34]], align 1
-// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
-// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR36]], align 1
+// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
+// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR34]], align 4
+// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
+// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR36]], align 4
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i32 @foo_int(i32 %[[VAR35]], i32 %[[VAR37]])
 
 void bar_short(void) {
@@ -126,10 +126,10 @@
 // CHECK: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
 // CHECK: store i16 2, i16* %[[VAR42]]
 // CHECK: store i16 -3, i16* %[[VAR43]]
-// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
-// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR44]], align 1
-// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
-// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR46]], align 1
+// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
+// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR44]], align 2
+// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
+// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR46]], align 2
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i16 @foo_short(i16 %[[VAR45]], i16 %[[VAR47]])
 
 void bar_char(void) {
@@ -142,9 +142,9 @@
 // CHECK: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
 // CHECK: store i8 2, i8* %[[VAR52]]
 // CHECK: store i8 -3, i8* %[[VAR53]]
-// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
+// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
 // CHECK: %[[VAR55:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR54]], align 1
-// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
+// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
 // CHECK: %[[VAR57:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR56]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i8 @foo_char(i8 %[[VAR55]], i8 %[[VAR57]])
 
@@ -158,10 +158,10 @@
 // CHECK: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR62]]
 // CHECK: store i64 -3, i64* %[[VAR63]]
-// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
-// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR64]], align 1
-// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
-// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR66]], align 1
+// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
+// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR64]], align 8
+// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
+// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR66]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long(i64 %[[VAR65]], i64 %[[VAR67]])
 
 void bar_long_long(void) {
@@ -174,10 +174,10 @@
 // CHECK: %[[VAR73:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR72]]
 // CHECK: store i64 -3, i64* %[[VAR73]]
-// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
-// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR74]], align 1
-// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
-// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR76]], align 1
+// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
+// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR74]], align 8
+// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
+// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR76]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long_long(i64 %[[VAR75]], i64 %[[VAR77]])
 
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
index d0ccfbe..efc6fe9 100644
--- a/test/CodeGen/ppc64-struct-onefloat.c
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -13,16 +13,16 @@
 // CHECK:  %b = alloca %struct.s2, align 8
 // CHECK:  %d = alloca %struct.s4, align 4
 // CHECK:  %e = alloca %struct.s5, align 8
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0
-// CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0
-// CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %a, i32 0, i32 0
+// CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %b, i32 0, i32 0
+// CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s4, %struct.s4* %d, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s5, %struct.s5* %e, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  ret void
 
 void foo(void) 
@@ -35,15 +35,15 @@
 }
 
 // CHECK-LABEL: define void @foo
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %p1, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %p2, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s4, %struct.s4* %p4, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s5, %struct.s5* %p5, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}})
 // CHECK:  ret void
diff --git a/test/CodeGen/ppc64-varargs-complex.c b/test/CodeGen/ppc64-varargs-complex.c
index f790629..5820680 100644
--- a/test/CodeGen/ppc64-varargs-complex.c
+++ b/test/CodeGen/ppc64-varargs-complex.c
@@ -9,15 +9,14 @@
 
   _Complex int i   = va_arg(ap, _Complex int);
   // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
-  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
-  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 4
-  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 12
-  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR2]] to i32*
-  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 4
+  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 12
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR1]] to i32*
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR2]] to i32*
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]], align 4
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]], align 4
   // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
@@ -25,15 +24,14 @@
 
   _Complex short s = va_arg(ap, _Complex short);
   // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
-  // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 6
-  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 14
-  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR12]] to i16*
-  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 6
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 14
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR12]] to i16*
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR13]] to i16*
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]], align 2
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]], align 2
   // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
@@ -41,15 +39,12 @@
 
   _Complex char c  = va_arg(ap, _Complex char);
   // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
-  // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 7
-  // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 15
-  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR22]] to i8*
-  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 7
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 15
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]], align 1
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]], align 1
   // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
@@ -57,15 +52,14 @@
 
   _Complex float f = va_arg(ap, _Complex float);
   // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
-  // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 4
-  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 12
-  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR32]] to float*
-  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 4
+  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 12
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR32]] to float*
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR33]] to float*
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]], align 4
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]], align 4
   // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
index 76798c1..3ad4b06 100644
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -54,49 +54,49 @@
 struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
 
 // CHECK-LABEL: @call_f1
-// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 4
 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
 struct f1 global_f1;
 void call_f1(void) { global_f1 = func_f1(global_f1); }
 
 // CHECK-LABEL: @call_f2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 4
 // CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
 struct f2 global_f2;
 void call_f2(void) { global_f2 = func_f2(global_f2); }
 
 // CHECK-LABEL: @call_f3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 4
 // CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
 struct f3 global_f3;
 void call_f3(void) { global_f3 = func_f3(global_f3); }
 
 // CHECK-LABEL: @call_f4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 4
 // CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
 struct f4 global_f4;
 void call_f4(void) { global_f4 = func_f4(global_f4); }
 
 // CHECK-LABEL: @call_f5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 4
 // CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
 struct f5 global_f5;
 void call_f5(void) { global_f5 = func_f5(global_f5); }
 
 // CHECK-LABEL: @call_f6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 4
 // CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
 struct f6 global_f6;
 void call_f6(void) { global_f6 = func_f6(global_f6); }
 
 // CHECK-LABEL: @call_f7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 4
 // CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
 struct f7 global_f7;
 void call_f7(void) { global_f7 = func_f7(global_f7); }
 
 // CHECK-LABEL: @call_f8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 4
 // CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
 struct f8 global_f8;
 void call_f8(void) { global_f8 = func_f8(global_f8); }
@@ -104,7 +104,7 @@
 // CHECK-LABEL: @call_f9
 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
 // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
diff --git a/test/CodeGen/ppc64le-varargs-complex.c b/test/CodeGen/ppc64le-varargs-complex.c
index 68dfa0b..399371b 100644
--- a/test/CodeGen/ppc64le-varargs-complex.c
+++ b/test/CodeGen/ppc64le-varargs-complex.c
@@ -9,14 +9,13 @@
 
   _Complex int i   = va_arg(ap, _Complex int);
   // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
-  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
-  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 8
-  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR1]] to i32*
-  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 8
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR40]] to i32*
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR3]] to i32*
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]], align 8
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]], align 8
   // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
@@ -24,14 +23,13 @@
 
   _Complex short s = va_arg(ap, _Complex short);
   // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
-  // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 8
-  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR11]] to i16*
-  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 8
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR50]] to i16*
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR13]] to i16*
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]], align 8
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]], align 8
   // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
@@ -39,14 +37,11 @@
 
   _Complex char c  = va_arg(ap, _Complex char);
   // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
-  // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 8
-  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR21]] to i8*
-  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 8
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR60]], align 8
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]], align 8
   // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
@@ -54,14 +49,13 @@
 
   _Complex float f = va_arg(ap, _Complex float);
   // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
-  // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 8
-  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR31]] to float*
-  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 8
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR70]] to float*
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR33]] to float*
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]], align 8
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]], align 8
   // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
diff --git a/test/CodeGen/pr9614.c b/test/CodeGen/pr9614.c
index 53abef1..63cb5af 100644
--- a/test/CodeGen/pr9614.c
+++ b/test/CodeGen/pr9614.c
@@ -4,26 +4,42 @@
 inline void foo (void) {
   return foo_alias ();
 }
-extern void bar_alias (void) __asm ("bar");
-inline __attribute__ ((__always_inline__)) void bar (void) {
-  return bar_alias ();
+extern int abs_alias (int) __asm ("abs");
+inline __attribute__ ((__always_inline__)) int abs (int x) {
+  return abs_alias(x);
 }
 extern char *strrchr_foo (const char *__s, int __c)  __asm ("strrchr");
 extern inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__)) char * strrchr_foo (const char *__s, int __c)  {
   return __builtin_strrchr (__s, __c);
 }
+
+extern inline void __attribute__((always_inline, __gnu_inline__))
+prefetch(void) {
+  __builtin_prefetch(0, 0, 1);
+}
+
+extern inline __attribute__((__always_inline__, __gnu_inline__)) void *memchr(void *__s, int __c, __SIZE_TYPE__ __n) {
+  return __builtin_memchr(__s, __c, __n);
+}
+
 void f(void) {
   foo();
-  bar();
+  abs(0);
   strrchr_foo("", '.');
+  prefetch();
+  memchr("", '.', 0);
 }
 
 // CHECK-LABEL: define void @f()
 // CHECK: call void @foo()
-// CHECK-NEXT: call void @bar()
-// CHECK-NEXT: call i8* @strrchr(
-// CHECK-NEXT: ret void
+// CHECK: call i32 @abs(i32 0)
+// CHECK: call i8* @strrchr(
+// CHECK: call void @llvm.prefetch(
+// CHECK: call i8* @memchr(
+// CHECK: ret void
 
 // CHECK: declare void @foo()
-// CHECK: declare void @bar()
+// CHECK: declare i32 @abs(i32
 // CHECK: declare i8* @strrchr(i8*, i32)
+// CHECK: declare i8* @memchr(
+// CHECK: declare void @llvm.prefetch(
diff --git a/test/CodeGen/pragma-comment.c b/test/CodeGen/pragma-comment.c
index fbae9d5..6da2068 100644
--- a/test/CodeGen/pragma-comment.c
+++ b/test/CodeGen/pragma-comment.c
@@ -30,3 +30,4 @@
 // PS4: !{!"\01msvcrt.lib"}
 // PS4: !{!"\01kernel32"}
 // PS4: !{!"\01USER32.LIB"}
+// PS4: !{!"\01\22with space\22"}
diff --git a/test/CodeGen/pragma-weak.c b/test/CodeGen/pragma-weak.c
index aba98e1..36abca5 100644
--- a/test/CodeGen/pragma-weak.c
+++ b/test/CodeGen/pragma-weak.c
@@ -5,18 +5,18 @@
 // CHECK: @correct_linkage = weak global
 
 
-// CHECK-DAG: @both = alias void ()* @__both
-// CHECK-DAG: @both2 = alias void ()* @__both2
-// CHECK-DAG: @weakvar_alias = weak alias i32* @__weakvar_alias
-// CHECK-DAG: @foo = weak alias void ()* @__foo
-// CHECK-DAG: @foo2 = weak alias void ()* @__foo2
-// CHECK-DAG: @stutter = weak alias void ()* @__stutter
-// CHECK-DAG: @stutter2 = weak alias void ()* @__stutter2
-// CHECK-DAG: @declfirst = weak alias void ()* @__declfirst
-// CHECK-DAG: @declfirstattr = weak alias void ()* @__declfirstattr
-// CHECK-DAG: @mix2 = weak alias void ()* @__mix2
-// CHECK-DAG: @a1 = weak alias void ()* @__a1
-// CHECK-DAG: @xxx = weak alias void ()* @__xxx
+// CHECK-DAG: @both = alias void (), void ()* @__both
+// CHECK-DAG: @both2 = alias void (), void ()* @__both2
+// CHECK-DAG: @weakvar_alias = weak alias i32, i32* @__weakvar_alias
+// CHECK-DAG: @foo = weak alias void (), void ()* @__foo
+// CHECK-DAG: @foo2 = weak alias void (), void ()* @__foo2
+// CHECK-DAG: @stutter = weak alias void (), void ()* @__stutter
+// CHECK-DAG: @stutter2 = weak alias void (), void ()* @__stutter2
+// CHECK-DAG: @declfirst = weak alias void (), void ()* @__declfirst
+// CHECK-DAG: @declfirstattr = weak alias void (), void ()* @__declfirstattr
+// CHECK-DAG: @mix2 = weak alias void (), void ()* @__mix2
+// CHECK-DAG: @a1 = weak alias void (), void ()* @__a1
+// CHECK-DAG: @xxx = weak alias void (), void ()* @__xxx
 
 
 
@@ -53,12 +53,14 @@
 #pragma weak unused // expected-warning {{weak identifier 'unused' never declared}}
 #pragma weak unused_alias = __unused_alias  // expected-warning {{weak identifier '__unused_alias' never declared}}
 
-#pragma weak td // expected-warning {{weak identifier 'td' never declared}}
+#pragma weak td // expected-warning {{'weak' attribute only applies to variables and functions}}
 typedef int td;
 
-#pragma weak td2 = __td2 // expected-warning {{weak identifier '__td2' never declared}}
+#pragma weak td2 = __td2 // expected-warning {{'weak' attribute only applies to variables and functions}}
 typedef int __td2;
 
+typedef int __td3;
+#pragma weak td3 = __td3 // expected-warning {{'weak' attribute only applies to variables and functions}}
 
 ///// test weird cases
 
diff --git a/test/CodeGen/prefetchw-builtins.c b/test/CodeGen/prefetchw-builtins.c
index 9c5fdc7..8a50325 100644
--- a/test/CodeGen/prefetchw-builtins.c
+++ b/test/CodeGen/prefetchw-builtins.c
@@ -5,8 +5,14 @@
 
 #include <x86intrin.h>
 
-void prefetch_w(void *p) {
+void test_m_prefetch(void *p) {
+  return _m_prefetch(p);
+// CHECK-LABEL: define void @test_m_prefetch
+// CHECK: call void @llvm.prefetch({{.*}}, i32 0, i32 3, i32 1)
+}
+
+void test_m_prefetch_w(void *p) {
   return _m_prefetchw(p);
-// CHECK: @prefetch_w
+// CHECK-LABEL: define void @test_m_prefetch_w
 // CHECK: call void @llvm.prefetch({{.*}}, i32 1, i32 3, i32 1)
 }
diff --git a/test/CodeGen/redefine_extname.c b/test/CodeGen/redefine_extname.c
index a91e5b8..d56527a 100644
--- a/test/CodeGen/redefine_extname.c
+++ b/test/CodeGen/redefine_extname.c
@@ -13,3 +13,20 @@
 // CHECK:   call i32 @real()
 // Check that this also works with variables names
 // CHECK:   load i32, i32* @alias
+
+// This is a case when redefenition is deferred *and* we have a local of the
+// same name. PR23923.
+#pragma redefine_extname foo bar
+int f() {
+  int foo = 0;
+  return foo;
+}
+extern int foo() { return 1; }
+// CHECK: define i32 @bar()
+
+// Check that pragma redefine_extname applies to external declarations only.
+#pragma redefine_extname foo_static bar_static
+static int foo_static() { return 1; }
+int baz() { return foo_static(); }
+// CHECK-NOT: call i32 @bar_static()
+
diff --git a/test/CodeGen/rtm-builtins.c b/test/CodeGen/rtm-builtins.c
index 5660d8e..5cf3237 100644
--- a/test/CodeGen/rtm-builtins.c
+++ b/test/CodeGen/rtm-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +rtm -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +rtm -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/sanitize-address-field-padding.cpp b/test/CodeGen/sanitize-address-field-padding.cpp
index d4eea1b..045a434 100644
--- a/test/CodeGen/sanitize-address-field-padding.cpp
+++ b/test/CodeGen/sanitize-address-field-padding.cpp
@@ -5,6 +5,8 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.type.blacklist -Rsanitize-address -emit-llvm -o - %s -O1 -mconstructor-aliases 2>&1 | FileCheck %s --check-prefix=WITH_CTOR_ALIASES
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.file.blacklist -Rsanitize-address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=FILE_BLACKLIST
 // RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=NO_PADDING
+// Try to emulate -save-temps option and make sure -disable-llvm-passes will not run sanitize instrumentation.
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm -disable-llvm-passes -o - %s | %clang_cc1 -fsanitize=address -emit-llvm -o - -x ir | FileCheck %s --check-prefix=NO_PADDING
 //
 
 // The reasons to ignore a particular class are not set in stone and will change.
diff --git a/test/CodeGen/sanitize-blocks.c b/test/CodeGen/sanitize-blocks.c
new file mode 100644
index 0000000..103c33c
--- /dev/null
+++ b/test/CodeGen/sanitize-blocks.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsanitize-trap=returns-nonnull-attribute -fsanitize=returns-nonnull-attribute -emit-llvm %s -o - -triple x86_64-apple-darwin10 -fblocks | FileCheck %s
+
+// Awkward interactions of sanitizers with blocks.
+
+// rdar://22071955
+const char *TheString = "Hello, world!";
+const char *(^getString)(void) = ^{
+  return TheString;
+};
+
+// CHECK-LABEL: define internal i8* @getString_block_invoke
+
+// TODO: Actually support returns_nonnull on blocks.
diff --git a/test/CodeGen/sanitize-trap.c b/test/CodeGen/sanitize-trap.c
new file mode 100644
index 0000000..45f9fed
--- /dev/null
+++ b/test/CodeGen/sanitize-trap.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero -fsanitize-trap=integer-divide-by-zero | FileCheck %s
+
+int f(int x, int y) {
+  // CHECK: %[[B1:.*]] = icmp ne i32 %[[D:.*]], 0
+  // CHECK: %[[B2:.*]] = icmp ne i32 %[[N:.*]], -2147483648
+  // CHECK: %[[B3:.*]] = icmp ne i32 %[[D]], -1
+  // CHECK: %[[B4:.*]] = or i1 %[[B2]], %[[B3]]
+  // CHECK: br i1 %[[B1]], label %[[L1:[0-9a-z_.]*]], label %[[L2:[0-9a-z_.]*]]
+
+  // {{^|:}} used to match both Debug form of the captured label
+  // cont:
+  // and Release form
+  // ; <label>:14
+  // But avoids false matches inside other numbers such as [114 x i8].
+  // CHECK: {{^|:}}[[L2]]
+  // CHECK-NEXT: call void @llvm.trap()
+  // CHECK-NEXT: unreachable
+
+  // CHECK: {{^|:}}[[L1]]
+  // CHECK-NEXT: br i1 %[[B4]], label %[[L3:[0-9a-z_.]*]], label %[[L4:[0-9a-z_.]*]]
+
+  // CHECK: {{^|:}}[[L4]]
+  // CHECK-NEXT: zext
+  // CHECK-NEXT: zext
+  // CHECK-NEXT: __ubsan_handle_divrem_overflow
+
+  // CHECK: {{^|:}}[[L3]]
+  // CHECK-NEXT: sdiv i32 %[[N]], %[[D]]
+  return x / y;
+}
diff --git a/test/CodeGen/sha-builtins.c b/test/CodeGen/sha-builtins.c
index 2b11ded..9c14a1e 100644
--- a/test/CodeGen/sha-builtins.c
+++ b/test/CodeGen/sha-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +sha -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-unknown -target-feature +sha -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/sparc-arguments.c b/test/CodeGen/sparc-arguments.c
new file mode 100644
index 0000000..c86b40b
--- /dev/null
+++ b/test/CodeGen/sparc-arguments.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple sparc-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Ensure that we pass proper alignment to llvm in the call
+// instruction. The proper alignment for the type is sometimes known
+// only by clang, and is not manifest in the LLVM-type. So, it must be
+// explicitly passed through. (Besides the case of the user specifying
+// alignment, as here, this situation also occurrs for non-POD C++
+// structs with tail-padding: clang emits these as packed llvm-structs
+// for ABI reasons.)
+
+struct s1 {
+  int x;
+} __attribute__((aligned(8)));
+
+struct s1 x1;
+
+
+// Ensure the align 8 is passed through:
+// CHECK-LABEL: define void @f1()
+// CHECK: call void @f1_helper(%struct.s1* byval align 8 @x1)
+// Also ensure the declaration of f1_helper includes it
+// CHECK: declare void @f1_helper(%struct.s1* byval align 8)
+
+void f1_helper(struct s1);
+void f1() {
+  f1_helper(x1);
+}
diff --git a/test/CodeGen/sparcv9-abi.c b/test/CodeGen/sparcv9-abi.c
index bf44719..5984fa5 100644
--- a/test/CodeGen/sparcv9-abi.c
+++ b/test/CodeGen/sparcv9-abi.c
@@ -132,9 +132,9 @@
   while ((c = *f++)) switch (c) {
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
-// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 4
+// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 4
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[EXT]] to i32*
 // CHECK-DAG: load i32, i32* %[[ADR]]
 // CHECK: br
@@ -143,7 +143,7 @@
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to i64*
 // CHECK-DAG: load i64, i64* %[[ADR]]
@@ -153,7 +153,7 @@
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.tiny*
 // CHECK: br
@@ -162,7 +162,7 @@
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 16
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.small*
 // CHECK: br
@@ -171,7 +171,7 @@
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[IND:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.medium**
 // CHECK-DAG: %[[ADR:[^ ]+]] = load %struct.medium*, %struct.medium** %[[IND]]
diff --git a/test/CodeGen/sse-builtins-dbg.c b/test/CodeGen/sse-builtins-dbg.c
index 8190744..2567894 100644
--- a/test/CodeGen/sse-builtins-dbg.c
+++ b/test/CodeGen/sse-builtins-dbg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 
 // Test that intrinsic calls inlined from _mm_* wrappers have debug metadata.
 
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c
index 6d66cca..0f964e8 100644
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -135,54 +135,6 @@
   return _mm_loadl_epi64(y);
 }
 
-__m128i test_mm_minpos_epu16(__m128i x) {
-  // CHECK: define {{.*}} @test_mm_minpos_epu16
-  // CHECK: @llvm.x86.sse41.phminposuw
-  return _mm_minpos_epu16(x);
-}
-
-__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
-  // CHECK: define {{.*}} @test_mm_mpsadbw_epu8
-  // CHECK: @llvm.x86.sse41.mpsadbw
-  return _mm_mpsadbw_epu8(x, y, 1);
-}
-
-__m128 test_mm_dp_ps(__m128 x, __m128 y) {
-  // CHECK: define {{.*}} @test_mm_dp_ps
-  // CHECK: @llvm.x86.sse41.dpps
-  return _mm_dp_ps(x, y, 2);
-}
-
-__m128d test_mm_dp_pd(__m128d x, __m128d y) {
-  // CHECK: define {{.*}} @test_mm_dp_pd
-  // CHECK: @llvm.x86.sse41.dppd
-  return _mm_dp_pd(x, y, 2);
-}
-
-__m128 test_mm_round_ps(__m128 x) {
-  // CHECK: define {{.*}} @test_mm_round_ps
-  // CHECK: @llvm.x86.sse41.round.ps
-  return _mm_round_ps(x, 2);
-}
-
-__m128 test_mm_round_ss(__m128 x, __m128 y) {
-  // CHECK: define {{.*}} @test_mm_round_ss
-  // CHECK: @llvm.x86.sse41.round.ss
-  return _mm_round_ss(x, y, 2);
-}
-
-__m128d test_mm_round_pd(__m128d x) {
-  // CHECK: define {{.*}} @test_mm_round_pd
-  // CHECK: @llvm.x86.sse41.round.pd
-  return _mm_round_pd(x, 2);
-}
-
-__m128d test_mm_round_sd(__m128d x, __m128d y) {
-  // CHECK: define {{.*}} @test_mm_round_sd
-  // CHECK: @llvm.x86.sse41.round.sd
-  return _mm_round_sd(x, y, 2);
-}
-
 void test_storel_epi64(__m128i x, void* y) {
   // CHECK-LABEL: define void @test_storel_epi64
   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
@@ -214,48 +166,6 @@
   _mm_extract_epi16(__a, 8);
 }
 
-int test_extract_ps(__m128i __a) {
-  // CHECK-LABEL: @test_extract_ps
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  return _mm_extract_ps(__a, 4);
-}
-
-int test_extract_epi8(__m128i __a) {
-  // CHECK-LABEL: @test_extract_epi8
-  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
-  return _mm_extract_epi8(__a, 16);
-}
-
-int test_extract_epi32(__m128i __a) {
-  // CHECK-LABEL: @test_extract_epi32
-  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
-  return _mm_extract_epi32(__a, 4);
-}
-
-void test_insert_epi32(__m128i __a, int b) {
-  // CHECK-LABEL: @test_insert_epi32
-  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
-   _mm_insert_epi32(__a, b, 4);
-}
-
-__m128d test_blend_pd(__m128d V1, __m128d V2) {
-  // CHECK-LABEL: @test_blend_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 2, i32 1>
-  return _mm_blend_pd(V1, V2, 1);
-}
-
-__m128 test_blend_ps(__m128 V1, __m128 V2) {
-  // CHECK-LABEL: @test_blend_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  return _mm_blend_ps(V1, V2, 5);
-}
-
-__m128i test_blend_epi16(__m128i V1, __m128i V2) {
-  // CHECK-LABEL: @test_blend_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
-  return _mm_blend_epi16(V1, V2, 42);
-}
-
 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
   // CHECK-LABEL: @test_mm_cmpeq_ss
   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
@@ -568,12 +478,44 @@
   return _mm_bsrli_si128(a, 5);
 }
 
-__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
-  return _mm_alignr_epi8(a, b, 2);
+__m128 test_mm_undefined_ps() {
+  // CHECK-LABEL: @test_mm_undefined_ps
+  // CHECK: ret <4 x float> undef
+  return _mm_undefined_ps();
 }
 
-__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
-  return _mm_alignr_epi8(a, b, 17);
+__m128d test_mm_undefined_pd() {
+  // CHECK-LABEL: @test_mm_undefined_pd
+  // CHECK: ret <2 x double> undef
+  return _mm_undefined_pd();
+}
+
+__m128i test_mm_undefined_si128() {
+  // CHECK-LABEL: @test_mm_undefined_si128
+  // CHECK: ret <2 x i64> undef
+  return _mm_undefined_si128();
+}
+
+__m64 test_mm_add_si64(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_add_si64
+  // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_add_si64(__a, __b);
+}
+
+__m64 test_mm_sub_si64(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_sub_si64
+  // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_sub_si64(__a, __b);
+}
+
+__m64 test_mm_mul_su32(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_mul_su32
+  // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_mul_su32(__a, __b);
+}
+
+void test_mm_pause() {
+  // CHECK-LABEL: @test_mm_pause
+  // CHECK @llvm.x86.sse2.pause()
+  return _mm_pause();
 }
diff --git a/test/CodeGen/sse.c b/test/CodeGen/sse.c
index 17cce69..1e8c5db 100644
--- a/test/CodeGen/sse.c
+++ b/test/CodeGen/sse.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 -O3 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
+// FIXME: This test currently depends on optimization - it should be rewritten to avoid it.
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <emmintrin.h>
 
diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c
new file mode 100644
index 0000000..4ceb93a
--- /dev/null
+++ b/test/CodeGen/sse2-builtins.c
@@ -0,0 +1,1105 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_add_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi8
+  // CHECK: add <16 x i8>
+  return _mm_add_epi8(A, B);
+}
+
+__m128i test_mm_add_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi16
+  // CHECK: add <8 x i16>
+  return _mm_add_epi16(A, B);
+}
+
+__m128i test_mm_add_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi32
+  // CHECK: add <4 x i32>
+  return _mm_add_epi32(A, B);
+}
+
+__m128i test_mm_add_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi64
+  // CHECK: add <2 x i64>
+  return _mm_add_epi64(A, B);
+}
+
+__m128d test_mm_add_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_add_pd
+  // CHECK: fadd <2 x double>
+  return _mm_add_pd(A, B);
+}
+
+__m128d test_mm_add_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_add_sd
+  // CHECK: fadd double
+  return _mm_add_sd(A, B);
+}
+
+__m128i test_mm_adds_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b
+  return _mm_adds_epi8(A, B);
+}
+
+__m128i test_mm_adds_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w
+  return _mm_adds_epi16(A, B);
+}
+
+__m128i test_mm_adds_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b
+  return _mm_adds_epu8(A, B);
+}
+
+__m128i test_mm_adds_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w
+  return _mm_adds_epu16(A, B);
+}
+
+__m128d test_mm_and_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_and_pd
+  // CHECK: and <4 x i32>
+  return _mm_and_pd(A, B);
+}
+
+__m128i test_mm_and_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_and_si128
+  // CHECK: and <2 x i64>
+  return _mm_and_si128(A, B);
+}
+
+__m128i test_mm_avg_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_avg_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b
+  return _mm_avg_epu8(A, B);
+}
+
+__m128i test_mm_avg_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_avg_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w
+  return _mm_avg_epu16(A, B);
+}
+
+__m128i test_mm_bslli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_bslli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+  return _mm_bslli_si128(A, 5);
+}
+
+__m128i test_mm_bsrli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_bsrli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+  return _mm_bsrli_si128(A, 5);
+}
+
+void test_mm_clflush(void* A) {
+  // CHECK-LABEL: test_mm_clflush
+  // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
+  _mm_clflush(A);
+}
+
+__m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi8
+  // CHECK: icmp eq <16 x i8>
+  return _mm_cmpeq_epi8(A, B);
+}
+
+__m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi16
+  // CHECK: icmp eq <8 x i16>
+  return _mm_cmpeq_epi16(A, B);
+}
+
+__m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi32
+  // CHECK: icmp eq <4 x i32>
+  return _mm_cmpeq_epi32(A, B);
+}
+
+__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpeq_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_pd(A, B);
+}
+
+__m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpeq_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_sd(A, B);
+}
+
+__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpge_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_pd(A, B);
+}
+
+__m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpge_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_sd(A, B);
+}
+
+__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmpgt_epi8(A, B);
+}
+
+__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmpgt_epi16(A, B);
+}
+
+__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmpgt_epi32(A, B);
+}
+
+__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpgt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_pd(A, B);
+}
+
+__m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpgt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_sd(A, B);
+}
+
+__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmple_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_pd(A, B);
+}
+
+__m128d test_mm_cmple_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmple_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_sd(A, B);
+}
+
+__m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmplt_epi8(A, B);
+}
+
+__m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmplt_epi16(A, B);
+}
+
+__m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmplt_epi32(A, B);
+}
+
+__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmplt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_pd(A, B);
+}
+
+__m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmplt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_sd(A, B);
+}
+
+__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpneq_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_pd(A, B);
+}
+
+__m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpneq_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_sd(A, B);
+}
+
+__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnge_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_pd(A, B);
+}
+
+__m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnge_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_sd(A, B);
+}
+
+__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpngt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_pd(A, B);
+}
+
+__m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpngt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_sd(A, B);
+}
+
+__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnle_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_pd(A, B);
+}
+
+__m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnle_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_sd(A, B);
+}
+
+__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnlt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_pd(A, B);
+}
+
+__m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnlt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_sd(A, B);
+}
+
+__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpord_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_pd(A, B);
+}
+
+__m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpord_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_sd(A, B);
+}
+
+__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpunord_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_pd(A, B);
+}
+
+__m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpunord_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_sd(A, B);
+}
+
+int test_mm_comieq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comieq_sd
+  // CHECK: call i32 @llvm.x86.sse2.comieq.sd
+  return _mm_comieq_sd(A, B);
+}
+
+int test_mm_comige_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comige_sd
+  // CHECK: call i32 @llvm.x86.sse2.comige.sd
+  return _mm_comige_sd(A, B);
+}
+
+int test_mm_comigt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comigt_sd
+  // CHECK: call i32 @llvm.x86.sse2.comigt.sd
+  return _mm_comigt_sd(A, B);
+}
+
+int test_mm_comile_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comile_sd
+  // CHECK: call i32 @llvm.x86.sse2.comile.sd
+  return _mm_comile_sd(A, B);
+}
+
+int test_mm_comilt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comilt_sd
+  // CHECK: call i32 @llvm.x86.sse2.comilt.sd
+  return _mm_comilt_sd(A, B);
+}
+
+int test_mm_comineq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comineq_sd
+  // CHECK: call i32 @llvm.x86.sse2.comineq.sd
+  return _mm_comineq_sd(A, B);
+}
+
+__m128d test_mm_cvtepi32_pd(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi32_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd
+  return _mm_cvtepi32_pd(A);
+}
+
+__m128 test_mm_cvtepi32_ps(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi32_ps
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps
+  return _mm_cvtepi32_ps(A);
+}
+
+__m128i test_mm_cvtpd_epi32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtpd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq
+  return _mm_cvtpd_epi32(A);
+}
+
+__m128 test_mm_cvtpd_ps(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtpd_ps
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps
+  return _mm_cvtpd_ps(A);
+}
+
+__m128i test_mm_cvtps_epi32(__m128 A) {
+  // CHECK-LABEL: test_mm_cvtps_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq
+  return _mm_cvtps_epi32(A);
+}
+
+__m128d test_mm_cvtps_pd(__m128 A) {
+  // CHECK-LABEL: test_mm_cvtps_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd
+  return _mm_cvtps_pd(A);
+}
+
+double test_mm_cvtsd_f64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_f64
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  return _mm_cvtsd_f64(A);
+}
+
+int test_mm_cvtsd_si32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_si32
+  // CHECK: call i32 @llvm.x86.sse2.cvtsd2si
+  return _mm_cvtsd_si32(A);
+}
+
+long long test_mm_cvtsd_si64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_si64
+  // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64
+  return _mm_cvtsd_si64(A);
+}
+
+__m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
+  // CHECK-LABEL: test_mm_cvtsd_ss
+  // CHECK: fptrunc double %{{.*}} to float
+  return _mm_cvtsd_ss(A, B);
+}
+
+int test_mm_cvtsi128_si32(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtsi128_si32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+  return _mm_cvtsi128_si32(A);
+}
+
+long long test_mm_cvtsi128_si64(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtsi128_si64
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
+  return _mm_cvtsi128_si64(A);
+}
+
+__m128d test_mm_cvtsi32_sd(__m128d A, int B) {
+  // CHECK-LABEL: test_mm_cvtsi32_sd
+  // CHECK: sitofp i32 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtsi32_sd(A, B);
+}
+
+__m128i test_mm_cvtsi32_si128(int A) {
+  // CHECK-LABEL: test_mm_cvtsi32_si128
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+  return _mm_cvtsi32_si128(A);
+}
+
+__m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
+  // CHECK-LABEL: test_mm_cvtsi64_sd
+  // CHECK: sitofp i64 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtsi64_sd(A, B);
+}
+
+__m128i test_mm_cvtsi64_si128(long long A) {
+  // CHECK-LABEL: test_mm_cvtsi64_si128
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  return _mm_cvtsi64_si128(A);
+}
+
+__m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
+  // CHECK-LABEL: test_mm_cvtss_sd
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  // CHECK: fpext float %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtss_sd(A, B);
+}
+
+__m128i test_mm_cvttpd_epi32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttpd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq
+  return _mm_cvttpd_epi32(A);
+}
+
+__m128i test_mm_cvttps_epi32(__m128 A) {
+  // CHECK-LABEL: test_mm_cvttps_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq
+  return _mm_cvttps_epi32(A);
+}
+
+int test_mm_cvttsd_si32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttsd_si32
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: fptosi double %{{.*}} to i32
+  return _mm_cvttsd_si32(A);
+}
+
+long long test_mm_cvttsd_si64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttsd_si64
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: fptosi double %{{.*}} to i64
+  return _mm_cvttsd_si64(A);
+}
+
+__m128d test_mm_div_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_div_pd
+  // CHECK: fdiv <2 x double>
+  return _mm_div_pd(A, B);
+}
+
+__m128d test_mm_div_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_div_sd
+  // CHECK: fdiv double
+  return _mm_div_sd(A, B);
+}
+
+// Lowering to pextrw requires optimization.
+int test_mm_extract_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_extract_epi16
+  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
+  return _mm_extract_epi16(A, 8);
+}
+
+__m128i test_mm_insert_epi16(__m128i A, short B) {
+  // CHECK-LABEL: test_mm_insert_epi16
+  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
+  // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
+  return _mm_insert_epi16(A, B, 8);
+}
+
+void test_mm_lfence() {
+  // CHECK-LABEL: test_mm_lfence
+  // CHECK: call void @llvm.x86.sse2.lfence()
+  _mm_lfence();
+}
+
+__m128d test_mm_load_pd(double const* A) {
+  // CHECK-LABEL: test_mm_load_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  return _mm_load_pd(A);
+}
+
+__m128d test_mm_load_sd(double const* A) {
+  // CHECK-LABEL: test_mm_load_sd
+  // CHECK: load double, double* %{{.*}}, align 1
+  return _mm_load_sd(A);
+}
+
+__m128i test_mm_load_si128(__m128i const* A) {
+  // CHECK-LABEL: test_mm_load_si128
+  // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+  return _mm_load_si128(A);
+}
+
+__m128d test_mm_load1_pd(double const* A) {
+  // CHECK-LABEL: test_mm_load1_pd
+  // CHECK: load double, double* %{{.*}}, align 8
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
+  return _mm_load1_pd(A);
+}
+
+__m128d test_mm_loadh_pd(__m128d x, void* y) {
+  // CHECK-LABEL: test_mm_loadh_pd
+  // CHECK: load double, double* %{{.*}}, align 1{{$}}
+  return _mm_loadh_pd(x, y);
+}
+
+__m128d test_mm_loadr_pd(double const* A) {
+  // CHECK-LABEL: test_mm_loadr_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+  return _mm_loadr_pd(A);
+}
+
+__m128d test_mm_loadu_pd(double const* A) {
+  // CHECK-LABEL: test_mm_loadu_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
+  return _mm_loadu_pd(A);
+}
+
+__m128i test_mm_loadu_si128(__m128i const* A) {
+  // CHECK-LABEL: test_mm_loadu_si128
+  // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1
+  return _mm_loadu_si128(A);
+}
+
+__m128i test_mm_madd_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_madd_epi16
+  // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_madd_epi16(A, B);
+}
+
+void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
+  // CHECK-LABEL: test_mm_maskmoveu_si128
+  // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
+  _mm_maskmoveu_si128(A, B, C);
+}
+
+__m128i test_mm_max_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_max_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_max_epi16(A, B);
+}
+
+__m128i test_mm_max_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_max_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_max_epu8(A, B);
+}
+
+__m128d test_mm_max_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_max_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_max_pd(A, B);
+}
+
+__m128d test_mm_max_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_max_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_max_sd(A, B);
+}
+
+void test_mm_mfence() {
+  // CHECK-LABEL: test_mm_mfence
+  // CHECK: call void @llvm.x86.sse2.mfence()
+  _mm_mfence();
+}
+
+__m128i test_mm_min_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_min_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_min_epi16(A, B);
+}
+
+__m128i test_mm_min_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_min_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_min_epu8(A, B);
+}
+
+__m128d test_mm_min_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_min_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_min_pd(A, B);
+}
+
+__m128d test_mm_min_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_min_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_min_sd(A, B);
+}
+
+int test_mm_movemask_epi8(__m128i A) {
+  // CHECK-LABEL: test_mm_movemask_epi8
+  // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
+  return _mm_movemask_epi8(A);
+}
+
+int test_mm_movemask_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_movemask_pd
+  // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
+  return _mm_movemask_pd(A);
+}
+
+__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mul_epu32
+  // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_mul_epu32(A, B);
+}
+
+__m128d test_mm_mul_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_mul_pd
+  // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
+  return _mm_mul_pd(A, B);
+}
+
+__m128d test_mm_mul_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_mul_sd
+  // CHECK: fmul double %{{.*}}, %{{.*}}
+  return _mm_mul_sd(A, B);
+}
+
+__m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mulhi_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mulhi_epi16(A, B);
+}
+
+__m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mulhi_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mulhi_epu16(A, B);
+}
+
+__m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mullo_epi16
+  // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
+  return _mm_mullo_epi16(A, B);
+}
+
+__m128d test_mm_or_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_or_pd
+  // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+  return _mm_or_pd(A, B);
+}
+
+__m128i test_mm_or_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_or_si128
+  // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+  return _mm_or_si128(A, B);
+}
+
+__m128i test_mm_packs_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packs_epi16
+  // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_packs_epi16(A, B);
+}
+
+__m128i test_mm_packs_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packs_epi32
+  // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_packs_epi32(A, B);
+}
+
+__m128i test_mm_packus_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packus_epi16
+  // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_packus_epi16(A, B);
+}
+
+void test_mm_pause() {
+  // CHECK-LABEL: test_mm_pause
+  // CHECK: call void @llvm.x86.sse2.pause()
+  return _mm_pause();
+}
+
+__m128i test_mm_sad_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sad_epu8
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_sad_epu8(A, B);
+}
+
+__m128d test_mm_setzero_pd() {
+  // CHECK-LABEL: test_mm_setzero_pd
+  // CHECK: store <2 x double> zeroinitializer
+  return _mm_setzero_pd();
+}
+
+__m128i test_mm_setzero_si128() {
+  // CHECK-LABEL: test_mm_setzero_si128
+  // CHECK: store <2 x i64> zeroinitializer
+  return _mm_setzero_si128();
+}
+
+__m128i test_mm_shuffle_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_shuffle_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
+  return _mm_shuffle_epi32(A, 0);
+}
+
+__m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_shuffle_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
+  return _mm_shuffle_pd(A, B, 1);
+}
+
+__m128i test_mm_shufflehi_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_shufflehi_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  return _mm_shufflehi_epi16(A, 0);
+}
+
+__m128i test_mm_shufflelo_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_shufflelo_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  return _mm_shufflelo_epi16(A, 0);
+}
+
+__m128i test_mm_sll_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w
+  return _mm_sll_epi16(A, B);
+}
+
+__m128i test_mm_sll_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d
+  return _mm_sll_epi32(A, B);
+}
+
+__m128i test_mm_sll_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q
+  return _mm_sll_epi64(A, B);
+}
+
+__m128i test_mm_slli_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w
+  return _mm_slli_epi16(A, 1);
+}
+
+__m128i test_mm_slli_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d
+  return _mm_slli_epi32(A, 1);
+}
+
+__m128i test_mm_slli_epi64(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q
+  return _mm_slli_epi64(A, 1);
+}
+
+__m128i test_mm_slli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+  return _mm_slli_si128(A, 5);
+}
+
+__m128d test_mm_sqrt_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_sqrt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
+  return _mm_sqrt_pd(A);
+}
+
+__m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sqrt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
+  return _mm_sqrt_sd(A, B);
+}
+
+__m128i test_mm_sra_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sra_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w
+  return _mm_sra_epi16(A, B);
+}
+
+__m128i test_mm_sra_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sra_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d
+  return _mm_sra_epi32(A, B);
+}
+
+__m128i test_mm_srai_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_srai_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w
+  return _mm_srai_epi16(A, 1);
+}
+
+__m128i test_mm_srai_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_srai_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d
+  return _mm_srai_epi32(A, 1);
+}
+
+__m128i test_mm_srl_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w
+  return _mm_srl_epi16(A, B);
+}
+
+__m128i test_mm_srl_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d
+  return _mm_srl_epi32(A, B);
+}
+
+__m128i test_mm_srl_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q
+  return _mm_srl_epi64(A, B);
+}
+
+__m128i test_mm_srli_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w
+  return _mm_srli_epi16(A, 1);
+}
+
+__m128i test_mm_srli_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d
+  return _mm_srli_epi32(A, 1);
+}
+
+__m128i test_mm_srli_epi64(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q
+  return _mm_srli_epi64(A, 1);
+}
+
+__m128i test_mm_srli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+  return _mm_srli_si128(A, 5);
+}
+
+void test_mm_store_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_store_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
+  _mm_store_pd(A, B);
+}
+
+void test_mm_store_sd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_store_sd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
+  _mm_store_sd(A, B);
+}
+
+void test_mm_store_si128(__m128i* A, __m128i B) {
+  // CHECK-LABEL: test_mm_store_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+  _mm_store_si128(A, B);
+}
+
+void test_mm_storeh_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storeh_pd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
+  _mm_storeh_pd(A, B);
+}
+
+void test_mm_storel_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storel_pd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
+  _mm_storel_pd(A, B);
+}
+
+void test_mm_storeu_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storeu_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1
+  _mm_storeu_pd(A, B);
+}
+
+void test_mm_storeu_si128(__m128i* A, __m128i B) {
+  // CHECK-LABEL: test_mm_storeu_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1
+  _mm_storeu_si128(A, B);
+}
+
+void test_mm_stream_pd(double *A, __m128d B) {
+  // CHECK-LABEL: test_mm_stream_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
+  _mm_stream_pd(A, B);
+}
+
+void test_mm_stream_si32(int *A, int B) {
+  // CHECK-LABEL: test_mm_stream_si32
+  // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
+  _mm_stream_si32(A, B);
+}
+
+void test_mm_stream_si64(long long *A, long long B) {
+  // CHECK-LABEL: test_mm_stream_si64
+  // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
+  _mm_stream_si64(A, B);
+}
+
+void test_mm_stream_si128(__m128i *A, __m128i B) {
+  // CHECK-LABEL: test_mm_stream_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
+  _mm_stream_si128(A, B);
+}
+
+__m128i test_mm_sub_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi8
+  // CHECK: sub <16 x i8>
+  return _mm_sub_epi8(A, B);
+}
+
+__m128i test_mm_sub_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi16
+  // CHECK: sub <8 x i16>
+  return _mm_sub_epi16(A, B);
+}
+
+__m128i test_mm_sub_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi32
+  // CHECK: sub <4 x i32>
+  return _mm_sub_epi32(A, B);
+}
+
+__m128i test_mm_sub_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi64
+  // CHECK: sub <2 x i64>
+  return _mm_sub_epi64(A, B);
+}
+
+__m128d test_mm_sub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sub_pd
+  // CHECK: fsub <2 x double>
+  return _mm_sub_pd(A, B);
+}
+
+__m128d test_mm_sub_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sub_sd
+  // CHECK: fsub double
+  return _mm_sub_sd(A, B);
+}
+
+__m128i test_mm_subs_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b
+  return _mm_subs_epi8(A, B);
+}
+
+__m128i test_mm_subs_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w
+  return _mm_subs_epi16(A, B);
+}
+
+__m128i test_mm_subs_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b
+  return _mm_subs_epu8(A, B);
+}
+
+__m128i test_mm_subs_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w
+  return _mm_subs_epu16(A, B);
+}
+
+int test_mm_ucomieq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomieq_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd
+  return _mm_ucomieq_sd(A, B);
+}
+
+int test_mm_ucomige_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomige_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomige.sd
+  return _mm_ucomige_sd(A, B);
+}
+
+int test_mm_ucomigt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomigt_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd
+  return _mm_ucomigt_sd(A, B);
+}
+
+int test_mm_ucomile_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomile_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomile.sd
+  return _mm_ucomile_sd(A, B);
+}
+
+int test_mm_ucomilt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomilt_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd
+  return _mm_ucomilt_sd(A, B);
+}
+
+int test_mm_ucomineq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomineq_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd
+  return _mm_ucomineq_sd(A, B);
+}
+
+__m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  return _mm_unpackhi_epi8(A, B);
+}
+
+__m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  return _mm_unpackhi_epi16(A, B);
+}
+
+__m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  return _mm_unpackhi_epi32(A, B);
+}
+
+__m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi64
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  return _mm_unpackhi_epi64(A, B);
+}
+
+__m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_unpackhi_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  return _mm_unpackhi_pd(A, B);
+}
+
+__m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  return _mm_unpacklo_epi8(A, B);
+}
+
+__m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  return _mm_unpacklo_epi16(A, B);
+}
+
+__m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  return _mm_unpacklo_epi32(A, B);
+}
+
+__m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi64
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  return _mm_unpacklo_epi64(A, B);
+}
+
+__m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_unpacklo_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  return _mm_unpacklo_pd(A, B);
+}
+
+__m128d test_mm_xor_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_xor_pd
+  // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+  return _mm_xor_pd(A, B);
+}
+
+__m128i test_mm_xor_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_xor_si128
+  // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+  return _mm_xor_si128(A, B);
+}
diff --git a/test/CodeGen/sse3-builtins.c b/test/CodeGen/sse3-builtins.c
new file mode 100644
index 0000000..71a34e9
--- /dev/null
+++ b/test/CodeGen/sse3-builtins.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse3 -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_addsub_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.addsub.pd
+  return _mm_addsub_pd(A, B);
+}
+
+__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_addsub_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.addsub.ps
+  return _mm_addsub_ps(A, B);
+}
+
+__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_hadd_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.hadd.pd
+  return _mm_hadd_pd(A, B);
+}
+
+__m128 test_mm_hadd_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_hadd_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.hadd.ps
+  return _mm_hadd_ps(A, B);
+}
+
+__m128d test_mm_hsub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_hsub_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.hsub.pd
+  return _mm_hsub_pd(A, B);
+}
+
+__m128 test_mm_hsub_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_hsub_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.hsub.ps
+  return _mm_hsub_ps(A, B);
+}
+
+__m128i test_mm_lddqu_si128(__m128i const* P) {
+  // CHECK-LABEL: test_mm_lddqu_si128
+  // CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
+  return _mm_lddqu_si128(P);
+}
+
+__m128d test_mm_loaddup_pd(double const* P) {
+  // CHECK-LABEL: test_mm_loaddup_pd
+  // CHECK: load double*
+  return _mm_loaddup_pd(P);
+}
+
+__m128d test_mm_movedup_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_movedup_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  return _mm_movedup_pd(A);
+}
+
+__m128 test_mm_movehdup_ps(__m128 A) {
+  // CHECK-LABEL: test_mm_movehdup_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  return _mm_movehdup_ps(A);
+}
+
+__m128 test_mm_moveldup_ps(__m128 A) {
+  // CHECK-LABEL: test_mm_moveldup_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  return _mm_moveldup_ps(A);
+}
diff --git a/test/CodeGen/sse41-builtins.c b/test/CodeGen/sse41-builtins.c
new file mode 100644
index 0000000..9cd5c45
--- /dev/null
+++ b/test/CodeGen/sse41-builtins.c
@@ -0,0 +1,372 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
+  // CHECK-LABEL: test_mm_blend_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
+  return _mm_blend_epi16(V1, V2, 42);
+}
+
+__m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
+  // CHECK-LABEL: test_mm_blend_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_blend_pd(V1, V2, 2);
+}
+
+__m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
+  // CHECK-LABEL: test_mm_blend_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  return _mm_blend_ps(V1, V2, 6);
+}
+
+__m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
+  // CHECK-LABEL: test_mm_blendv_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb
+  return _mm_blendv_epi8(V1, V2, V3);
+}
+
+__m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
+  // CHECK-LABEL: test_mm_blendv_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd
+  return _mm_blendv_pd(V1, V2, V3);
+}
+
+__m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
+  // CHECK-LABEL: test_mm_blendv_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.blendvps
+  return _mm_blendv_ps(V1, V2, V3);
+}
+
+__m128d test_mm_ceil_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_ceil_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_ceil_pd(x);
+}
+
+__m128 test_mm_ceil_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_ceil_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_ceil_ps(x);
+}
+
+__m128d test_mm_ceil_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_ceil_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_ceil_sd(x, y);
+}
+
+__m128 test_mm_ceil_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_ceil_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_ceil_ss(x, y);
+}
+
+__m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi64
+  // CHECK: icmp eq <2 x i64>
+  return _mm_cmpeq_epi64(A, B);
+}
+
+__m128i test_mm_cvtepi8_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi16
+  // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
+  return _mm_cvtepi8_epi16(a);
+}
+
+__m128i test_mm_cvtepi8_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi32
+  // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
+  return _mm_cvtepi8_epi32(a);
+}
+
+__m128i test_mm_cvtepi8_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi64
+  // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
+  return _mm_cvtepi8_epi64(a);
+}
+
+__m128i test_mm_cvtepi16_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi16_epi32
+  // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
+  return _mm_cvtepi16_epi32(a);
+}
+
+__m128i test_mm_cvtepi16_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi16_epi64
+  // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
+  return _mm_cvtepi16_epi64(a);
+}
+
+__m128i test_mm_cvtepi32_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi32_epi64
+  // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
+  return _mm_cvtepi32_epi64(a);
+}
+
+__m128i test_mm_cvtepu8_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi16(a);
+}
+
+__m128i test_mm_cvtepu8_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi32(a);
+}
+
+__m128i test_mm_cvtepu8_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi64(a);
+}
+
+__m128i test_mm_cvtepu16_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu16_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
+  return _mm_cvtepu16_epi32(a);
+}
+
+__m128i test_mm_cvtepu16_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu16_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
+  return _mm_cvtepu16_epi64(a);
+}
+
+__m128i test_mm_cvtepu32_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu32_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
+  return _mm_cvtepu32_epi64(a);
+}
+
+__m128d test_mm_dp_pd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_dp_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.dppd
+  return _mm_dp_pd(x, y, 2);
+}
+
+__m128 test_mm_dp_ps(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_dp_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.dpps
+  return _mm_dp_ps(x, y, 2);
+}
+
+int test_mm_extract_epi8(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi8
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
+  return _mm_extract_epi8(x, 16);
+}
+
+int test_mm_extract_epi32(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
+  return _mm_extract_epi32(x, 1);
+}
+
+long long test_mm_extract_epi64(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi64
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
+  return _mm_extract_epi64(x, 1);
+}
+
+//TODO
+//int test_mm_extract_ps(__m128i x) {
+//  return _mm_extract_ps(_mm_add_ps(x,x), 1);
+//}
+
+__m128d test_mm_floor_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_floor_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_floor_pd(x);
+}
+
+__m128 test_mm_floor_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_floor_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_floor_ps(x);
+}
+
+__m128d test_mm_floor_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_floor_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_floor_sd(x, y);
+}
+
+__m128 test_mm_floor_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_floor_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_floor_ss(x, y);
+}
+
+__m128i test_mm_insert_epi8(__m128i x, char b) {
+  // CHECK-LABEL: test_mm_insert_epi8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
+  return _mm_insert_epi8(x, b, 16);
+}
+
+__m128i test_mm_insert_epi32(__m128i x, int b) {
+  // CHECK-LABEL: test_mm_insert_epi32
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
+  return _mm_insert_epi32(x, b, 4);
+}
+
+__m128i test_mm_insert_epi64(__m128i x, long long b) {
+  // CHECK-LABEL: test_mm_insert_epi64
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
+  return _mm_insert_epi64(x, b, 2);
+}
+
+__m128 test_mm_insert_ps(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_insert_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.insertps
+  return _mm_insert_ps(x, y, 5);
+}
+
+__m128i test_mm_max_epi8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb
+  return _mm_max_epi8(x, y);
+}
+
+__m128i test_mm_max_epu16(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw
+  return _mm_max_epu16(x, y);
+}
+
+__m128i test_mm_max_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd
+  return _mm_max_epi32(x, y);
+}
+
+__m128i test_mm_max_epu32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epu32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud
+  return _mm_max_epu32(x, y);
+}
+
+__m128i test_mm_min_epi8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
+  return _mm_min_epi8(x, y);
+}
+
+__m128i test_mm_min_epu16(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw
+  return _mm_min_epu16(x, y);
+}
+
+__m128i test_mm_min_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd
+  return _mm_min_epi32(x, y);
+}
+
+__m128i test_mm_min_epu32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epu32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pminud
+  return _mm_min_epu32(x, y);
+}
+
+__m128i test_mm_minpos_epu16(__m128i x) {
+  // CHECK-LABEL: test_mm_minpos_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw
+  return _mm_minpos_epu16(x);
+}
+
+__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mpsadbw_epu8
+  // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw
+  return _mm_mpsadbw_epu8(x, y, 1);
+}
+
+__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mul_epi32
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq
+  return _mm_mul_epi32(x, y);
+}
+
+__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mullo_epi32
+  // CHECK: mul <4 x i32>
+  return _mm_mullo_epi32(x, y);
+}
+
+__m128i test_mm_packus_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_packus_epi32
+  // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw
+  return _mm_packus_epi32(x, y);
+}
+
+__m128d test_mm_round_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_round_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_round_pd(x, 2);
+}
+
+__m128 test_mm_round_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_round_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_round_ps(x, 2);
+}
+
+__m128d test_mm_round_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_round_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_round_sd(x, y, 2);
+}
+
+__m128 test_mm_round_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_round_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_round_ss(x, y, 2);
+}
+
+__m128i test_mm_stream_load_si128(__m128i const *a) {
+  // CHECK-LABEL: test_mm_stream_load_si128
+  // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa
+  return _mm_stream_load_si128(a);
+}
+
+int test_mm_test_all_ones(__m128i x) {
+  // CHECK-LABEL: test_mm_test_all_ones
+  // CHECK: call i32 @llvm.x86.sse41.ptestc
+  return _mm_test_all_ones(x);
+}
+
+int test_mm_test_all_zeros(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_test_all_zeros
+  // CHECK: call i32 @llvm.x86.sse41.ptestz
+  return _mm_test_all_zeros(x, y);
+}
+
+int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_test_mix_ones_zeros
+  // CHECK: call i32 @llvm.x86.sse41.ptestnzc
+  return _mm_test_mix_ones_zeros(x, y);
+}
+
+int test_mm_testc_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testc_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestc
+  return _mm_testc_si128(x, y);
+}
+
+int test_mm_testnzc_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testnzc_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestnzc
+  return _mm_testnzc_si128(x, y);
+}
+
+int test_mm_testz_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testz_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestz
+  return _mm_testz_si128(x, y);
+}
diff --git a/test/CodeGen/sse42-builtins.c b/test/CodeGen/sse42-builtins.c
new file mode 100644
index 0000000..e3215dd
--- /dev/null
+++ b/test/CodeGen/sse42-builtins.c
@@ -0,0 +1,139 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmpgt_epi8(A, B);
+}
+
+__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmpgt_epi16(A, B);
+}
+
+__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmpgt_epi32(A, B);
+}
+
+__m128i test_mm_cmpgt_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi64
+  // CHECK: icmp sgt <2 x i64>
+  return _mm_cmpgt_epi64(A, B);
+}
+
+int test_mm_cmpestra(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestra
+  // CHECK: @llvm.x86.sse42.pcmpestria128
+  return _mm_cmpestra(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrc
+  // CHECK: @llvm.x86.sse42.pcmpestric128
+  return _mm_cmpestrc(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestri
+  // CHECK: @llvm.x86.sse42.pcmpestri128
+  return _mm_cmpestri(A, LA, B, LB, 7);
+}
+
+__m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrm
+  // CHECK: @llvm.x86.sse42.pcmpestrm128
+  return _mm_cmpestrm(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestro(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestro
+  // CHECK: @llvm.x86.sse42.pcmpestrio128
+  return _mm_cmpestro(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrs(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrs
+  // CHECK: @llvm.x86.sse42.pcmpestris128
+  return _mm_cmpestrs(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrz(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrz
+  // CHECK: @llvm.x86.sse42.pcmpestriz128
+  return _mm_cmpestrz(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpistra(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistra
+  // CHECK: @llvm.x86.sse42.pcmpistria128
+  return _mm_cmpistra(A, B, 7);
+}
+
+int test_mm_cmpistrc(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrc
+  // CHECK: @llvm.x86.sse42.pcmpistric128
+  return _mm_cmpistrc(A, B, 7);
+}
+
+int test_mm_cmpistri(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistri
+  // CHECK: @llvm.x86.sse42.pcmpistri128
+  return _mm_cmpistri(A, B, 7);
+}
+
+__m128i test_mm_cmpistrm(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrm
+  // CHECK: @llvm.x86.sse42.pcmpistrm128
+  return _mm_cmpistrm(A, B, 7);
+}
+
+int test_mm_cmpistro(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistro
+  // CHECK: @llvm.x86.sse42.pcmpistrio128
+  return _mm_cmpistro(A, B, 7);
+}
+
+int test_mm_cmpistrs(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrs
+  // CHECK: @llvm.x86.sse42.pcmpistris128
+  return _mm_cmpistrs(A, B, 7);
+}
+
+int test_mm_cmpistrz(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrz
+  // CHECK: @llvm.x86.sse42.pcmpistriz128
+  return _mm_cmpistrz(A, B, 7);
+}
+
+unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) {
+  // CHECK-LABEL: test_mm_crc32_u8
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.8
+  return _mm_crc32_u8(CRC, V);
+}
+
+unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) {
+  // CHECK-LABEL: test_mm_crc32_u16
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.16
+  return _mm_crc32_u16(CRC, V);
+}
+
+unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) {
+  // CHECK-LABEL: test_mm_crc32_u32
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.32
+  return _mm_crc32_u32(CRC, V);
+}
+
+unsigned int test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
+  // CHECK-LABEL: test_mm_crc32_u64
+  // CHECK: call i64 @llvm.x86.sse42.crc32.64.64
+  return _mm_crc32_u64(CRC, V);
+}
diff --git a/test/CodeGen/sse4a-builtins.c b/test/CodeGen/sse4a-builtins.c
index e1d7e8f..9a408b8 100644
--- a/test/CodeGen/sse4a-builtins.c
+++ b/test/CodeGen/sse4a-builtins.c
@@ -1,39 +1,42 @@
-// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4a -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4a -emit-llvm -o - -Werror | FileCheck %s
 
-#include <ammintrin.h>
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
-__m128i test_extracti_si64(__m128i x) {
+#include <x86intrin.h>
+
+__m128i test_mm_extracti_si64(__m128i x) {
+  // CHECK-LABEL: test_mm_extracti_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
   return _mm_extracti_si64(x, 3, 2);
-// CHECK: @test_extracti_si64
-// CHECK: @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
 }
 
-__m128i test_extract_si64(__m128i x, __m128i y) {
+__m128i test_mm_extract_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_extract_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
   return _mm_extract_si64(x, y);
-// CHECK: @test_extract_si64
-// CHECK: @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
 }
 
-__m128i test_inserti_si64(__m128i x, __m128i y) {
+__m128i test_mm_inserti_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_inserti_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
   return _mm_inserti_si64(x, y, 5, 6);
-// CHECK: @test_inserti_si64
-// CHECK: @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
 }
 
-__m128i test_insert_si64(__m128i x, __m128i y) {
+__m128i test_mm_insert_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_insert_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
   return _mm_insert_si64(x, y);
-// CHECK: @test_insert_si64
-// CHECK: @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
 }
 
-void test_stream_sd(double *p, __m128d a) {
+void test_mm_stream_sd(double *p, __m128d a) {
+  // CHECK-LABEL: test_mm_stream_sd
+  // CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
   _mm_stream_sd(p, a);
-// CHECK: @test_stream_sd
-// CHECK: @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
 }
 
-void test_stream_ss(float *p, __m128 a) {
+void test_mm_stream_ss(float *p, __m128 a) {
+  // CHECK-LABEL: test_mm_stream_ss
+  // CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
   _mm_stream_ss(p, a);
-// CHECK: @test_stream_ss
-// CHECK: @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
 }
diff --git a/test/CodeGen/ssse3-builtins.c b/test/CodeGen/ssse3-builtins.c
new file mode 100644
index 0000000..d4b27a1
--- /dev/null
+++ b/test/CodeGen/ssse3-builtins.c
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_abs_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.pabs.b.128
+  return _mm_abs_epi8(a);
+}
+
+__m128i test_mm_abs_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pabs.w.128
+  return _mm_abs_epi16(a);
+}
+
+__m128i test_mm_abs_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.pabs.d.128
+  return _mm_abs_epi32(a);
+}
+
+__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_alignr_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  return _mm_alignr_epi8(a, b, 2);
+}
+
+__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test2_mm_alignr_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+  return _mm_alignr_epi8(a, b, 17);
+}
+
+__m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadd_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128
+  return _mm_hadd_epi16(a, b);
+}
+
+__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128
+  return _mm_hadd_epi32(a, b);
+}
+
+__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadds_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128
+  return _mm_hadds_epi16(a, b);
+}
+
+__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsub_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128
+  return _mm_hsub_epi16(a, b);
+}
+
+__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsub_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128
+  return _mm_hsub_epi32(a, b);
+}
+
+__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsubs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128
+  return _mm_hsubs_epi16(a, b);
+}
+
+__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_maddubs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128
+  return _mm_maddubs_epi16(a, b);
+}
+
+__m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_mulhrs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128
+  return _mm_mulhrs_epi16(a, b);
+}
+
+__m128i test_mm_shuffle_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shuffle_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.pshuf.b.128
+  return _mm_shuffle_epi8(a, b);
+}
+
+__m128i test_mm_sign_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128
+  return _mm_sign_epi8(a, b);
+}
+
+__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128
+  return _mm_sign_epi16(a, b);
+}
+
+__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128
+  return _mm_sign_epi32(a, b);
+}
diff --git a/test/CodeGen/stack-protector.c b/test/CodeGen/stack-protector.c
index 8039b60..ecfbc90 100644
--- a/test/CodeGen/stack-protector.c
+++ b/test/CodeGen/stack-protector.c
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 0 | FileCheck -check-prefix=NOSSP %s
-// NOSSP: define void @test1(i8* %msg) #0 {
+// NOSSP: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 1 | FileCheck -check-prefix=WITHSSP %s
-// WITHSSP: define void @test1(i8* %msg) #0 {
+// WITHSSP: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 2 | FileCheck -check-prefix=SSPSTRONG %s
-// SSPSTRONG: define void @test1(i8* %msg) #0 {
+// SSPSTRONG: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 3 | FileCheck -check-prefix=SSPREQ %s
-// SSPREQ: define void @test1(i8* %msg) #0 {
+// SSPREQ: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -fsanitize=safe-stack | FileCheck -check-prefix=SAFESTACK %s
-// SAFESTACK: define void @test1(i8* %msg) #0 {
+// SAFESTACK: define {{.*}}void @test1(i8* %msg) #0 {
 
 typedef __SIZE_TYPE__ size_t;
 
diff --git a/test/CodeGen/stackrealign.c b/test/CodeGen/stackrealign.c
new file mode 100644
index 0000000..39b0939
--- /dev/null
+++ b/test/CodeGen/stackrealign.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -mstackrealign | FileCheck %s -check-prefix=REALIGN
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-REALIGN
+
+// REALIGN: attributes #{{[0-9]+}} = {{{.*}} "stackrealign"
+// NO-REALIGN-NOT: attributes #{{[0-9]+}} = {{{.*}} "stackrealign"
+
+void test1() {
+}
diff --git a/test/CodeGen/static-order.c b/test/CodeGen/static-order.c
index 58aabbe..20277d7 100644
--- a/test/CodeGen/static-order.c
+++ b/test/CodeGen/static-order.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
 // CHECK: ModuleID
 // CHECK-NOT: zeroinitializer
-// CHECK-LABEL: define i8* @f
+// CHECK-LABEL: define {{.*}}i8* @f
 
 struct s {
     int a;
diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c
index 89aa6f7..01de6a4 100644
--- a/test/CodeGen/string-literal-short-wstring.c
+++ b/test/CodeGen/string-literal-short-wstring.c
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fshort-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
 // Runs in c++ mode so that wchar_t is available.
 
+// XFAIL: hexagon
+// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
+// the first check line with "align 1".
+
 int main() {
   // This should convert to utf8.
   // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
diff --git a/test/CodeGen/string-literal.c b/test/CodeGen/string-literal.c
index 8bc97f1..c460187 100644
--- a/test/CodeGen/string-literal.c
+++ b/test/CodeGen/string-literal.c
@@ -102,6 +102,30 @@
 ??=
 def)";
 
+  // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"def\5C\0A??=\0Aabc\00", align 1
+  const char *s = u8R\
+"(def\
+??=
+abc)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i16] [i16 97, i16 98, i16 99, i16 92, i16 10, i16 63, i16 63, i16 61, i16 10, i16 100, i16 101, i16 102, i16 0], align 2
+  const char16_t *t = uR\
+"(abc\
+??=
+def)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i32] [i32 97, i32 98, i32 99, i32 92, i32 10, i32 63, i32 63, i32 61, i32 10, i32 100, i32 101, i32 102, i32 0], align 4
+  const char32_t *u = UR\
+"(abc\
+??=
+def)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i32] [i32 100, i32 101, i32 102, i32 92, i32 10, i32 63, i32 63, i32 61, i32 10, i32 97, i32 98, i32 99, i32 0], align 4
+  const wchar_t *v = LR\
+"(def\
+??=
+abc)";
+
 #endif
 #endif
 }
diff --git a/test/CodeGen/systemz-inline-asm.c b/test/CodeGen/systemz-inline-asm.c
index 92ed4bb..7d5a6b2 100644
--- a/test/CodeGen/systemz-inline-asm.c
+++ b/test/CodeGen/systemz-inline-asm.c
@@ -6,31 +6,31 @@
 void test_store_m(unsigned int i) {
   asm("st %1, %0" : "=m" (gi) : "r" (i));
 // CHECK-LABEL: define void @test_store_m(i32 zeroext %i)
-// CHECK: call void asm "st $1, $0", "=*m,r"(i32* @gi, i32 %i)
+// CHECK: call void asm "st $1, $0", "=*m,r"(i32* nonnull @gi, i32 %i)
 }
 
 void test_store_Q(unsigned int i) {
   asm("st %1, %0" : "=Q" (gi) : "r" (i));
 // CHECK-LABEL: define void @test_store_Q(i32 zeroext %i)
-// CHECK: call void asm "st $1, $0", "=*Q,r"(i32* @gi, i32 %i)
+// CHECK: call void asm "st $1, $0", "=*Q,r"(i32* nonnull @gi, i32 %i)
 }
 
 void test_store_R(unsigned int i) {
   asm("st %1, %0" : "=R" (gi) : "r" (i));
 // CHECK-LABEL: define void @test_store_R(i32 zeroext %i)
-// CHECK: call void asm "st $1, $0", "=*R,r"(i32* @gi, i32 %i)
+// CHECK: call void asm "st $1, $0", "=*R,r"(i32* nonnull @gi, i32 %i)
 }
 
 void test_store_S(unsigned int i) {
   asm("st %1, %0" : "=S" (gi) : "r" (i));
 // CHECK-LABEL: define void @test_store_S(i32 zeroext %i)
-// CHECK: call void asm "st $1, $0", "=*S,r"(i32* @gi, i32 %i)
+// CHECK: call void asm "st $1, $0", "=*S,r"(i32* nonnull @gi, i32 %i)
 }
 
 void test_store_T(unsigned int i) {
   asm("st %1, %0" : "=T" (gi) : "r" (i));
 // CHECK-LABEL: define void @test_store_T(i32 zeroext %i)
-// CHECK: call void asm "st $1, $0", "=*T,r"(i32* @gi, i32 %i)
+// CHECK: call void asm "st $1, $0", "=*T,r"(i32* nonnull @gi, i32 %i)
 }
 
 int test_load_m() {
@@ -38,7 +38,7 @@
   asm("l %0, %1" : "=r" (i) : "m" (gi));
   return i;
 // CHECK-LABEL: define signext i32 @test_load_m()
-// CHECK: call i32 asm "l $0, $1", "=r,*m"(i32* @gi)
+// CHECK: call i32 asm "l $0, $1", "=r,*m"(i32* nonnull @gi)
 }
 
 int test_load_Q() {
@@ -46,7 +46,7 @@
   asm("l %0, %1" : "=r" (i) : "Q" (gi));
   return i;
 // CHECK-LABEL: define signext i32 @test_load_Q()
-// CHECK: call i32 asm "l $0, $1", "=r,*Q"(i32* @gi)
+// CHECK: call i32 asm "l $0, $1", "=r,*Q"(i32* nonnull @gi)
 }
 
 int test_load_R() {
@@ -54,7 +54,7 @@
   asm("l %0, %1" : "=r" (i) : "R" (gi));
   return i;
 // CHECK-LABEL: define signext i32 @test_load_R()
-// CHECK: call i32 asm "l $0, $1", "=r,*R"(i32* @gi)
+// CHECK: call i32 asm "l $0, $1", "=r,*R"(i32* nonnull @gi)
 }
 
 int test_load_S() {
@@ -62,7 +62,7 @@
   asm("l %0, %1" : "=r" (i) : "S" (gi));
   return i;
 // CHECK-LABEL: define signext i32 @test_load_S()
-// CHECK: call i32 asm "l $0, $1", "=r,*S"(i32* @gi)
+// CHECK: call i32 asm "l $0, $1", "=r,*S"(i32* nonnull @gi)
 }
 
 int test_load_T() {
@@ -70,7 +70,7 @@
   asm("l %0, %1" : "=r" (i) : "T" (gi));
   return i;
 // CHECK-LABEL: define signext i32 @test_load_T()
-// CHECK: call i32 asm "l $0, $1", "=r,*T"(i32* @gi)
+// CHECK: call i32 asm "l $0, $1", "=r,*T"(i32* nonnull @gi)
 }
 
 void test_mI(unsigned char *c) {
diff --git a/test/CodeGen/target-builtin-error-2.c b/test/CodeGen/target-builtin-error-2.c
new file mode 100644
index 0000000..949f2cc
--- /dev/null
+++ b/test/CodeGen/target-builtin-error-2.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// Since we do code generation on a function level this needs to error out since
+// the subtarget feature won't be available.
+__m256d wombat(__m128i a) {
+  if (__builtin_cpu_supports("avx"))
+    return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+  else
+    return (__m256d){0, 0, 0, 0};
+}
diff --git a/test/CodeGen/target-builtin-error.c b/test/CodeGen/target-builtin-error.c
new file mode 100644
index 0000000..ee41277
--- /dev/null
+++ b/test/CodeGen/target-builtin-error.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128d foo(__m128d a, __m128d b) {
+  return __builtin_ia32_addsubps(b, a); // expected-error {{'__builtin_ia32_addsubps' needs target feature sse3}}
+}
diff --git a/test/CodeGen/target-builtin-noerror.c b/test/CodeGen/target-builtin-noerror.c
new file mode 100644
index 0000000..7d86b96
--- /dev/null
+++ b/test/CodeGen/target-builtin-noerror.c
@@ -0,0 +1,44 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// No warnings.
+extern __m256i a;
+int __attribute__((target("avx"))) bar(__m256i a) {
+  return _mm256_extract_epi32(a, 3);
+}
+
+int baz() {
+  return bar(a);
+}
+
+int __attribute__((target("avx"))) qq_avx(__m256i a) {
+  return _mm256_extract_epi32(a, 3);
+}
+
+int qq_noavx() {
+  return 0;
+}
+
+extern __m256i a;
+int qq() {
+  if (__builtin_cpu_supports("avx"))
+    return qq_avx(a);
+  else
+    return qq_noavx();
+}
+
+// Test that fma and fma4 are both separately and combined valid for an fma intrinsic.
+__m128 __attribute__((target("fma"))) fma_1(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma4"))) fma_2(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma,fma4"))) fma_3(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 3c3ea04..08265f9 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -78,6 +78,14 @@
 // RUN: FileCheck %s -check-prefix=LE32-NACL
 // LE32-NACL: target datalayout = "e-p:32:32-i64:64"
 
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | \
+// RUN: FileCheck %s -check-prefix=WEBASSEMBLY32
+// WEBASSEMBLY32: target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | \
+// RUN: FileCheck %s -check-prefix=WEBASSEMBLY64
+// WEBASSEMBLY64: target datalayout = "e-p:64:64-i64:64-n32:64-S128"
+
 // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC
 // PPC: target datalayout = "E-m:e-p:32:32-i64:64-n32"
@@ -149,7 +157,7 @@
 
 // RUN: %clang_cc1 -triple hexagon-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=HEXAGON
-// HEXAGON: target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-n32"
+// HEXAGON: target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
 
 // RUN: %clang_cc1 -triple s390x-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
diff --git a/test/CodeGen/target-features-error-2.c b/test/CodeGen/target-features-error-2.c
new file mode 100644
index 0000000..c23d152
--- /dev/null
+++ b/test/CodeGen/target-features-error-2.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+int baz(__m256i a) {
+  return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}}
+}
diff --git a/test/CodeGen/target-features-error.c b/test/CodeGen/target-features-error.c
new file mode 100644
index 0000000..518f6e6
--- /dev/null
+++ b/test/CodeGen/target-features-error.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+int __attribute__((target("avx"), always_inline)) foo(int a) {
+  return a + 4;
+}
+int bar() {
+  return foo(4); // expected-error {{always_inline function 'foo' requires target feature 'sse4.2', but would be inlined into function 'bar' that is compiled without support for 'sse4.2'}}
+}
+
diff --git a/test/CodeGen/target-features-no-error.c b/test/CodeGen/target-features-no-error.c
new file mode 100644
index 0000000..b6283b6
--- /dev/null
+++ b/test/CodeGen/target-features-no-error.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -o - -target-feature -sse2
+
+// Verify that negative features don't cause additional requirements on the inline function.
+int __attribute__((target("sse"), always_inline)) foo(int a) {
+  return a + 4;
+}
+int bar() {
+  return foo(4); // expected-no-diagnostics
+}
diff --git a/test/CodeGen/tbaa-class.cpp b/test/CodeGen/tbaa-class.cpp
index a8005d6..f611ae5 100644
--- a/test/CodeGen/tbaa-class.cpp
+++ b/test/CodeGen/tbaa-class.cpp
@@ -51,10 +51,10 @@
 };
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z1g
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z1g
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
   *s = 1;
@@ -63,22 +63,22 @@
 }
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g2
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16:!.*]]
+// PATH-LABEL: define i32 @_Z2g2
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_A_f16:!.*]]
   *s = 1;
   A->f16 = 4;
   return *s;
 }
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g3
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g3
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
   A->f32 = 1;
@@ -87,22 +87,22 @@
 }
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g4
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g4
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_B_a_f16:!.*]]
   A->f32 = 1;
   B->a.f16 = 4;
   return A->f32;
 }
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g5
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g5
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
   A->f32 = 1;
@@ -111,10 +111,10 @@
 }
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g6
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g6
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
   A->f32 = 1;
@@ -123,10 +123,10 @@
 }
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g7
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g7
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   A->f32 = 1;
@@ -135,22 +135,22 @@
 }
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g8
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g8
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S_f16:!.*]]
   A->f32 = 1;
   S->f16 = 4;
   return A->f32;
 }
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g9
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g9
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   S->f32 = 1;
@@ -159,10 +159,10 @@
 }
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g10
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g10
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32_2:!.*]]
   S->f32 = 1;
@@ -171,10 +171,10 @@
 }
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g11
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g11
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
   C->b.a.f32 = 1;
@@ -183,11 +183,11 @@
 }
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g12
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g12
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
   StructB *b1 = &(C->b);
diff --git a/test/CodeGen/tbaa.cpp b/test/CodeGen/tbaa.cpp
index 2bff5d0..c43ca58 100644
--- a/test/CodeGen/tbaa.cpp
+++ b/test/CodeGen/tbaa.cpp
@@ -45,10 +45,10 @@
 } StructS2;
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z1g
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z1g
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
   *s = 1;
@@ -57,22 +57,22 @@
 }
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g2
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16:!.*]]
+// PATH-LABEL: define i32 @_Z2g2
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_A_f16:!.*]]
   *s = 1;
   A->f16 = 4;
   return *s;
 }
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g3
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g3
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
   A->f32 = 1;
@@ -81,22 +81,22 @@
 }
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g4
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g4
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_B_a_f16:!.*]]
   A->f32 = 1;
   B->a.f16 = 4;
   return A->f32;
 }
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g5
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g5
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
   A->f32 = 1;
@@ -105,10 +105,10 @@
 }
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g6
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g6
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
   A->f32 = 1;
@@ -117,10 +117,10 @@
 }
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g7
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g7
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   A->f32 = 1;
@@ -129,22 +129,22 @@
 }
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g8
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g8
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S_f16:!.*]]
   A->f32 = 1;
   S->f16 = 4;
   return A->f32;
 }
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g9
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g9
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32:!.*]]
   S->f32 = 1;
@@ -153,22 +153,22 @@
 }
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g10
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z3g10
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S2_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S2_f16:!.*]]
   S->f32 = 1;
   S2->f16 = 4;
   return S->f32;
 }
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g11
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g11
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
   C->b.a.f32 = 1;
@@ -177,11 +177,11 @@
 }
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g12
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g12
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
   StructB *b1 = &(C->b);
@@ -202,9 +202,9 @@
 } ATTR;
 char g13(struct five *a, struct five *b) {
   return a->b;
-// CHECK: define signext i8 @{{.*}}(
+// CHECK-LABEL: define signext i8 @_Z3g13
 // CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char:!.*]]
-// PATH: define signext i8 @{{.*}}(
+// PATH-LABEL: define signext i8 @_Z3g13
 // PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]]
 }
 
@@ -215,9 +215,9 @@
   char c;
 };
 char g14(struct six *a, struct six *b) {
-// CHECK: define signext i8 @{{.*}}(
+// CHECK-LABEL: define signext i8 @_Z3g14
 // CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char]]
-// PATH: define signext i8 @{{.*}}(
+// PATH-LABEL: define signext i8 @_Z3g14
 // PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
   return a->b;
 }
@@ -225,10 +225,10 @@
 // Types that differ only by name may alias.
 typedef StructS StructS3;
 uint32_t g15(StructS *S, StructS3 *S3, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g15
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g15
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
   S->f32 = 1;
diff --git a/test/CodeGen/tbm-builtins.c b/test/CodeGen/tbm-builtins.c
index e3a7021..29e147a 100644
--- a/test/CodeGen/tbm-builtins.c
+++ b/test/CodeGen/tbm-builtins.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
+// FIXME: The code generation checks for add/sub and/or are depending on the optimizer.
+// The REQUIRES keyword will be removed when the FIXME is complete.
+// REQUIRES: x86-registered-target
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/thinlto_backend.c b/test/CodeGen/thinlto_backend.c
new file mode 100644
index 0000000..a2737fb
--- /dev/null
+++ b/test/CodeGen/thinlto_backend.c
@@ -0,0 +1,14 @@
+// RUN: %clang -O2 %s -flto=thin -c -o %t.o
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// Ensure clang -cc1 give expected error for incorrect input type
+// RUN: not %clang_cc1 -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING
+// CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir'
+
+// Ensure we get expected error for missing index file
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
+// CHECK-ERROR: Error loading index file 'bad.thinlto.bc'
+
+// Ensure Function Importing pass added
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=CHECK-PASS
+// CHECK-PASS: Function Importing
diff --git a/test/CodeGen/tls-model.c b/test/CodeGen/tls-model.c
index b5bae77..41c8de0 100644
--- a/test/CodeGen/tls-model.c
+++ b/test/CodeGen/tls-model.c
@@ -3,7 +3,12 @@
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
+//
+// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -femulated-tls -emit-llvm -o - 2>&1 | \
+// RUN:     FileCheck %s -check-prefix=CHECK-GD
 
+int z1 = 0;
+int z2;
 int __thread x;
 int f() {
   static int __thread y;
@@ -11,18 +16,29 @@
 }
 int __thread __attribute__((tls_model("initial-exec"))) z;
 
+// Note that unlike normal C uninitialized global variables,
+// uninitialized TLS variables do NOT have COMMON linkage.
+
+// CHECK-GD: @z1 = global i32 0
 // CHECK-GD: @f.y = internal thread_local global i32 0
+// CHECK-GD: @z2 = common global i32 0
 // CHECK-GD: @x = thread_local global i32 0
 // CHECK-GD: @z = thread_local(initialexec) global i32 0
 
+// CHECK-LD: @z1 = global i32 0
 // CHECK-LD: @f.y = internal thread_local(localdynamic) global i32 0
+// CHECK-LD: @z2 = common global i32 0
 // CHECK-LD: @x = thread_local(localdynamic) global i32 0
 // CHECK-LD: @z = thread_local(initialexec) global i32 0
 
+// CHECK-IE: @z1 = global i32 0
 // CHECK-IE: @f.y = internal thread_local(initialexec) global i32 0
+// CHECK-IE: @z2 = common global i32 0
 // CHECK-IE: @x = thread_local(initialexec) global i32 0
 // CHECK-IE: @z = thread_local(initialexec) global i32 0
 
+// CHECK-LE: @z1 = global i32 0
 // CHECK-LE: @f.y = internal thread_local(localexec) global i32 0
+// CHECK-LE: @z2 = common global i32 0
 // CHECK-LE: @x = thread_local(localexec) global i32 0
 // CHECK-LE: @z = thread_local(initialexec) global i32 0
diff --git a/test/CodeGen/ubsan-blacklist.c b/test/CodeGen/ubsan-blacklist.c
index 6c67f02..f6e3882 100644
--- a/test/CodeGen/ubsan-blacklist.c
+++ b/test/CodeGen/ubsan-blacklist.c
@@ -14,9 +14,9 @@
 // FUNC: @hash
 // FILE: @hash
 unsigned hash() {
-// DEFAULT: call void @__ubsan
-// FUNC-NOT: call void @__ubsan
-// FILE-NOT: call void @__ubsan
+// DEFAULT: call {{.*}}void @__ubsan
+// FUNC-NOT: call {{.*}}void @__ubsan
+// FILE-NOT: call {{.*}}void @__ubsan
   return i * 37;
 }
 
@@ -24,8 +24,8 @@
 // FUNC: @add
 // FILE: @add
 unsigned add() {
-// DEFAULT: call void @__ubsan
-// FUNC: call void @__ubsan
-// FILE-NOT: call void @__ubsan
+// DEFAULT: call {{.*}}void @__ubsan
+// FUNC: call {{.*}}void @__ubsan
+// FILE-NOT: call {{.*}}void @__ubsan
   return i + 1;
 }
diff --git a/test/CodeGen/ubsan-conditional.c b/test/CodeGen/ubsan-conditional.c
new file mode 100644
index 0000000..7f63b39
--- /dev/null
+++ b/test/CodeGen/ubsan-conditional.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -emit-llvm -fsanitize=float-divide-by-zero -o - | FileCheck %s
+
+_Bool b;
+// CHECK: @f(
+double f() {
+  // CHECK: %[[B:.*]] = load {{.*}} @b
+  // CHECK: %[[COND:.*]] = trunc {{.*}} %[[B]] to i1
+  // CHECK: br i1 %[[COND]]
+  return b ? 0.0 / 0.0 : 0.0;
+}
diff --git a/test/CodeGen/ubsan-type-blacklist.cpp b/test/CodeGen/ubsan-type-blacklist.cpp
index b3137e7..26b7aa8 100644
--- a/test/CodeGen/ubsan-type-blacklist.cpp
+++ b/test/CodeGen/ubsan-type-blacklist.cpp
@@ -14,7 +14,7 @@
 // DEFAULT: @_Z7checkmev
 // TYPE: @_Z7checkmev
 void checkme() {
-// DEFAULT: call void @__ubsan_handle_dynamic_type_cache_miss({{.*}} (%class.Bar* @bar to
+// DEFAULT: call void @__ubsan_handle_dynamic_type_cache_miss({{.*}} ({{.*}}* @bar to
 // TYPE-NOT: @__ubsan_handle_dynamic_type_cache_miss
   Foo* foo = static_cast<Foo*>(&bar); // down-casting
 // DEFAULT: ret void
diff --git a/test/CodeGen/vector-alignment.c b/test/CodeGen/vector-alignment.c
index 92d1ae7..d1fd771 100644
--- a/test/CodeGen/vector-alignment.c
+++ b/test/CodeGen/vector-alignment.c
@@ -1,38 +1,61 @@
-// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 -target-feature +avx \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx512f \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 -target-feature +avx512f \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
 // rdar://11759609
 
 // At or below target max alignment with no aligned attribute should align based
 // on the size of vector.
 double __attribute__((vector_size(16))) v1;
-// CHECK: @v1 {{.*}}, align 16
+// SSE: @v1 {{.*}}, align 16
+// AVX: @v1 {{.*}}, align 16
+// AVX512: @v1 {{.*}}, align 16
 double __attribute__((vector_size(32))) v2;
-// CHECK: @v2 {{.*}}, align 32
+// SSE: @v2 {{.*}}, align 16
+// AVX: @v2 {{.*}}, align 32
+// AVX512: @v2 {{.*}}, align 32
 
 // Alignment above target max alignment with no aligned attribute should align
 // based on the target max.
 double __attribute__((vector_size(64))) v3;
-// CHECK: @v3 {{.*}}, align 32
+// SSE: @v3 {{.*}}, align 16
+// AVX: @v3 {{.*}}, align 32
+// AVX512: @v3 {{.*}}, align 64
 double __attribute__((vector_size(1024))) v4;
-// CHECK: @v4 {{.*}}, align 32
+// SSE: @v4 {{.*}}, align 16
+// AVX: @v4 {{.*}}, align 32
+// AVX512: @v4 {{.*}}, align 64
 
 // Aliged attribute should always override.
 double __attribute__((vector_size(16), aligned(16))) v5;
-// CHECK: @v5 {{.*}}, align 16
+// ALL: @v5 {{.*}}, align 16
 double __attribute__((vector_size(16), aligned(64))) v6;
-// CHECK: @v6 {{.*}}, align 64
+// ALL: @v6 {{.*}}, align 64
 double __attribute__((vector_size(32), aligned(16))) v7;
-// CHECK: @v7 {{.*}}, align 16
+// ALL: @v7 {{.*}}, align 16
 double __attribute__((vector_size(32), aligned(64))) v8;
-// CHECK: @v8 {{.*}}, align 64
+// ALL: @v8 {{.*}}, align 64
 
 // Check non-power of 2 widths.
 double __attribute__((vector_size(24))) v9;
-// CHECK: @v9 {{.*}}, align 32
+// SSE: @v9 {{.*}}, align 16
+// AVX: @v9 {{.*}}, align 32
+// AVX512: @v9 {{.*}}, align 32
 double __attribute__((vector_size(40))) v10;
-// CHECK: @v10 {{.*}}, align 32
+// SSE: @v10 {{.*}}, align 16
+// AVX: @v10 {{.*}}, align 32
+// AVX512: @v10 {{.*}}, align 64
 
 // Check non-power of 2 widths with aligned attribute.
 double __attribute__((vector_size(24), aligned(64))) v11;
-// CHECK: @v11 {{.*}}, align 64
+// ALL: @v11 {{.*}}, align 64
 double __attribute__((vector_size(80), aligned(16))) v12;
-// CHECK: @v12 {{.*}}, align 16
+// ALL: @v12 {{.*}}, align 16
diff --git a/test/CodeGen/vector.c b/test/CodeGen/vector.c
index 6c14b7f..8e820f2 100644
--- a/test/CodeGen/vector.c
+++ b/test/CodeGen/vector.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-apple-darwin9 -O1 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -O1 -target-cpu pentium4 -target-feature +sse4.1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
 
 void test1() {
diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c
index 17927c7..9ee35b1 100644
--- a/test/CodeGen/vectorcall.c
+++ b/test/CodeGen/vectorcall.c
@@ -32,13 +32,13 @@
 // registers.
 void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
 // CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c)
-// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* align 8 %b, double %c)
+// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* %b, double %c)
 
 // Ensure that we pass builtin types directly while counting them against the
 // SSE register usage.
 void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
 // CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* align 8 %f)
+// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
 
 // Aggregates with more than four elements are not HFAs and are passed byval.
 // Because they are not classified as homogeneous, they don't get special
@@ -63,11 +63,11 @@
 
 void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
 // CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c)
-// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* align 16 %b, <4 x float> %c)
+// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* %b, <4 x float> %c)
 
 void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
 // CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* align 16 %f)
+// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
 
 typedef float __attribute__((ext_vector_type(3))) v3f32;
 struct OddSizeHVA { v3f32 x, y; };
diff --git a/test/CodeGen/vld_dup.c b/test/CodeGen/vld_dup.c
index 9590412..d910c82 100644
--- a/test/CodeGen/vld_dup.c
+++ b/test/CodeGen/vld_dup.c
@@ -14,7 +14,7 @@
     int64_t v7[4];
 
     v1 = vld3_dup_s32(v0);
-// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T169:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], 0
 // CHECK-NEXT: [[T170:%.*]] = shufflevector <2 x i32> [[T169]], <2 x i32> [[T169]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T171:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], <2 x i32> [[T170]], 0
@@ -26,7 +26,7 @@
 // CHECK-NEXT: [[T177:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T174]], <2 x i32> [[T176]], 2
 
     v3 = vld4_dup_s32(v2);
-// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T179:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], 0
 // CHECK-NEXT: [[T180:%.*]] = shufflevector <2 x i32> [[T179]], <2 x i32> [[T179]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T181:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], <2 x i32> [[T180]], 0
@@ -41,10 +41,10 @@
 // CHECK-NEXT: [[T190:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T187]], <2 x i32> [[T189]], 3
 
     v4 = vld3_dup_s64(v6);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     v5 = vld4_dup_s64(v7);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     return 0;
 }
diff --git a/test/CodeGen/volatile-1.c b/test/CodeGen/volatile-1.c
index 71cd5f8..f63274b 100644
--- a/test/CodeGen/volatile-1.c
+++ b/test/CodeGen/volatile-1.c
@@ -22,7 +22,7 @@
 // that do implicit lvalue-to-rvalue conversion are substantially
 // reduced.
 
-// CHECK-LABEL: define void @test()
+// CHECK-LABEL: define {{.*}}void @test()
 void test() {
   // CHECK: load volatile [[INT]], [[INT]]* @i
   i;
@@ -303,11 +303,11 @@
 }
 
 extern volatile enum X x;
-// CHECK-LABEL: define void @test1()
+// CHECK-LABEL: define {{.*}}void @test1()
 void test1() {
   extern void test1_helper(void);
   test1_helper();
-  // CHECK: call void @test1_helper()
+  // CHECK: call {{.*}}void @test1_helper()
   // CHECK-NEXT: ret void
   x;
   (void) x;
diff --git a/test/CodeGen/wasm-arguments.c b/test/CodeGen/wasm-arguments.c
new file mode 100644
index 0000000..723632b
--- /dev/null
+++ b/test/CodeGen/wasm-arguments.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -o - \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
+
+// Basic argument/attribute tests for WebAssembly
+
+// WEBASSEMBLY32: define void @f0(i32 %i, i32 %j, i64 %k, double %l, fp128 %m)
+// WEBASSEMBLY64: define void @f0(i32 %i, i64 %j, i64 %k, double %l, fp128 %m)
+void f0(int i, long j, long long k, double l, long double m) {}
+
+typedef struct {
+  int aa;
+  int bb;
+} s1;
+// Structs should be passed byval and not split up.
+// WEBASSEMBLY32: define void @f1(%struct.s1* byval align 4 %i)
+// WEBASSEMBLY64: define void @f1(%struct.s1* byval align 4 %i)
+void f1(s1 i) {}
+
+typedef struct {
+  int cc;
+} s2;
+// Single-element structs should be returned as the one element.
+// WEBASSEMBLY32: define i32 @f2()
+// WEBASSEMBLY64: define i32 @f2()
+s2 f2() {
+  s2 foo;
+  return foo;
+}
+
+typedef struct {
+  int cc;
+  int dd;
+} s3;
+// Structs should be returned sret and not simplified by the frontend.
+// WEBASSEMBLY32: define void @f3(%struct.s3* noalias sret %agg.result)
+// WEBASSEMBLY64: define void @f3(%struct.s3* noalias sret %agg.result)
+s3 f3() {
+  s3 foo;
+  return foo;
+}
+
+// WEBASSEMBLY32: define void @f4(i64 %i)
+// WEBASSEMBLY64: define void @f4(i64 %i)
+void f4(long long i) {}
+
+// i8/i16 should be signext, i32 and higher should not.
+// WEBASSEMBLY32: define void @f5(i8 signext %a, i16 signext %b)
+// WEBASSEMBLY64: define void @f5(i8 signext %a, i16 signext %b)
+void f5(char a, short b) {}
+
+// WEBASSEMBLY32: define void @f6(i8 zeroext %a, i16 zeroext %b)
+// WEBASSEMBLY64: define void @f6(i8 zeroext %a, i16 zeroext %b)
+void f6(unsigned char a, unsigned short b) {}
+
+
+enum my_enum {
+  ENUM1,
+  ENUM2,
+  ENUM3,
+};
+// Enums should be treated as the underlying i32.
+// WEBASSEMBLY32: define void @f7(i32 %a)
+// WEBASSEMBLY64: define void @f7(i32 %a)
+void f7(enum my_enum a) {}
+
+enum my_big_enum {
+  ENUM4 = 0xFFFFFFFFFFFFFFFF,
+};
+// Big enums should be treated as the underlying i64.
+// WEBASSEMBLY32: define void @f8(i64 %a)
+// WEBASSEMBLY64: define void @f8(i64 %a)
+void f8(enum my_big_enum a) {}
+
+union simple_union {
+  int a;
+  char b;
+};
+// Unions should be passed as byval structs.
+// WEBASSEMBLY32: define void @f9(%union.simple_union* byval align 4 %s)
+// WEBASSEMBLY64: define void @f9(%union.simple_union* byval align 4 %s)
+void f9(union simple_union s) {}
+
+typedef struct {
+  int b4 : 4;
+  int b3 : 3;
+  int b8 : 8;
+} bitfield1;
+// Bitfields should be passed as byval structs.
+// WEBASSEMBLY32: define void @f10(%struct.bitfield1* byval align 4 %bf1)
+// WEBASSEMBLY64: define void @f10(%struct.bitfield1* byval align 4 %bf1)
+void f10(bitfield1 bf1) {}
diff --git a/test/CodeGen/wasm-regparm.c b/test/CodeGen/wasm-regparm.c
new file mode 100644
index 0000000..66037d9
--- /dev/null
+++ b/test/CodeGen/wasm-regparm.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -fsyntax-only -verify
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -fsyntax-only -verify
+
+void __attribute__((regparm(2))) fc_f1(int i, int j, int k) {} // expected-error{{'regparm' is not valid on this platform}}
diff --git a/test/CodeGen/x86-soft-float.c b/test/CodeGen/x86-soft-float.c
new file mode 100644
index 0000000..3f75628
--- /dev/null
+++ b/test/CodeGen/x86-soft-float.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 3 -emit-llvm %s -o - | FileCheck %s -check-prefix=HARD
+// RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 3 -mfloat-abi soft -emit-llvm %s -o - | FileCheck %s -check-prefix=SOFT
+
+// HARD: define void @f1(float %a)
+// SOFT: define void @f1(float inreg %a)
+void f1(float a) {}
diff --git a/test/CodeGen/x86_32-arguments-iamcu.c b/test/CodeGen/x86_32-arguments-iamcu.c
new file mode 100644
index 0000000..12fafdf
--- /dev/null
+++ b/test/CodeGen/x86_32-arguments-iamcu.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -w -triple i386-pc-elfiamcu -mfloat-abi soft -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define void @ints(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 %d)
+void ints(int a, int b, int c, int d) {}
+
+// CHECK-LABEL: define void @floats(float inreg %a, float inreg %b, float inreg %c, float %d)
+void floats(float a, float b, float c, float d) {}
+
+// CHECK-LABEL: define void @mixed(i32 inreg %a, float inreg %b, i32 inreg %c, float %d)
+void mixed(int a, float b, int c, float d) {}
+
+// CHECK-LABEL: define void @doubles(double inreg %d1, double %d2)
+void doubles(double d1, double d2) {}
+
+// CHECK-LABEL: define void @mixedDoubles(i32 inreg %a, double inreg %d1)
+void mixedDoubles(int a, double d1) {}
+
+typedef struct st4_t {
+  int a;
+} st4_t;
+
+typedef struct st5_t {
+  int a;
+  char b;
+} st5_t;
+
+typedef  struct st12_t {
+  int a;
+  int b;
+  int c;
+} st12_t;
+
+// CHECK-LABEL: define void @smallStructs(i32 inreg %st1.coerce, i32 inreg %st2.coerce, i32 inreg %st3.coerce)
+void smallStructs(st4_t st1, st4_t st2, st4_t st3) {}
+
+// CHECK-LABEL: define void @paddedStruct(i32 inreg %i1, i32 inreg %st.coerce0, i32 inreg %st.coerce1, i32 %st4.0)
+void paddedStruct(int i1, st5_t st, st4_t st4) {}
+
+// CHECK-LABEL: define void @largeStruct(i32 %st.0, i32 %st.1, i32 %st.2)
+void largeStruct(st12_t st) {}
+
+// CHECK-LABEL: define void @largeStructMiddle(i32 inreg %i1, i32 %st.0, i32 %st.1, i32 %st.2, i32 inreg %i2, i32 inreg %i3)
+void largeStructMiddle(int i1, st12_t st, int i2, int i3) {}
+
+// CHECK-LABEL: define i32 @retSmallStruct(i32 inreg %r.coerce)
+st4_t retSmallStruct(st4_t r) { return r; }
+
+// CHECK-LABEL: define i64 @retPaddedStruct(i32 inreg %r.coerce0, i32 inreg %r.coerce1)
+st5_t retPaddedStruct(st5_t r) { return r; }
+
+// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* inreg noalias sret %agg.result, i32 inreg %i1, i32 %r.0, i32 %r.1, i32 %r.2)
+st12_t retLargeStruct(int i1, st12_t r) { return r; }
+
+// FIXME: We really shouldn't be marking this inreg. Right now the
+// inreg gets ignored by the CG for varargs functions, but that's
+// insane.
+// CHECK-LABEL: define i32 @varArgs(i32 inreg %i1, ...)
+int varArgs(int i1, ...) { return i1; }
+
+// CHECK-LABEL: define double @longDoubleArg(double inreg %ld1)
+long double longDoubleArg(long double ld1) { return ld1; }
+
diff --git a/test/CodeGen/x86_32-xsave.c b/test/CodeGen/x86_32-xsave.c
new file mode 100644
index 0000000..af458af
--- /dev/null
+++ b/test/CodeGen/x86_32-xsave.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE

+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE

+

+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT

+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT

+

+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsavec -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC

+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsavec -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC

+

+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES

+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES

+

+void test() {

+  unsigned long long tmp_ULLi;

+  void*              tmp_vp;

+

+#ifdef TEST_XSAVE

+// XSAVE: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVE: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVE: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVE: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVE: call void @llvm.x86.xsave(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);

+

+// XSAVE: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVE: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32

+// XSAVE: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32

+// XSAVE: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32

+// XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])

+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVEOPT

+// XSAVEOPT: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVEOPT: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEOPT: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVEOPT: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVEOPT: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVEC

+// XSAVEC: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVEC: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEC: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVEC: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVEC: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVES

+// XSAVES: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVES: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVES: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVES: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVES: call void @llvm.x86.xsaves(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);

+

+// XSAVES: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4

+// XSAVES: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32

+// XSAVES: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32

+// XSAVES: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32

+// XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])

+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);

+#endif

+}

diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index c412e3c..e3b853d 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
 #include <stdarg.h>
 
 // CHECK-LABEL: define signext i8 @f0()
@@ -334,7 +336,8 @@
 
 // CHECK-LABEL: define i32 @f44
 // CHECK: ptrtoint
-// CHECK-NEXT: and {{.*}}, -32
+// CHECK-NEXT: add i64 %{{[0-9]+}}, 31
+// CHECK-NEXT: and i64 %{{[0-9]+}}, -32
 // CHECK-NEXT: inttoptr
 typedef int T44 __attribute((vector_size(32)));
 struct s44 { T44 x; int y; };
@@ -458,3 +461,77 @@
 }
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+
+typedef float __m512 __attribute__ ((__vector_size__ (64)));
+typedef struct {
+  __m512 m;
+} s512;
+
+s512 x55;
+__m512 x56;
+
+// Even on AVX512, aggregates of size larger than four eightbytes have class
+// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
+//
+// CHECK: declare void @f55(%struct.s512* byval align 64)
+void f55(s512 x);
+
+// However, __m512 has type SSE/SSEUP on AVX512.
+//
+// AVX512: declare void @f56(<16 x float>)
+// NO-AVX512: declare void @f56(<16 x float>* byval align 64)
+void f56(__m512 x);
+void f57() { f55(x55); f56(x56); }
+
+// Like for __m128 on AVX, check that the struct below is passed
+// in the same way regardless of AVX512 being used.
+//
+// CHECK: declare void @f58(%struct.t256* byval align 32)
+typedef struct t256 {
+  __m256 m;
+  __m256 n;
+} two256;
+
+extern void f58(two256 s);
+void f59(two256 s) {
+  f58(s);
+}
+
+// CHECK: declare void @f60(%struct.sat256* byval align 32)
+typedef struct at256 {
+  __m256 array[2];
+} Atwo256;
+typedef struct sat256 {
+  Atwo256 x;
+} SAtwo256;
+
+extern void f60(SAtwo256 s);
+void f61(SAtwo256 s) {
+  f60(s);
+}
+
+// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+void f62_helper(int, ...);
+__m512 x62;
+void f62() {
+  f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+
+// Like for __m256 on AVX, we always pass __m512 in memory, and don't
+// need to use the register save area.
+//
+// AVX512-LABEL: define void @f63
+// AVX512-NOT: br i1
+// AVX512: ret void
+void f63(__m512 *m, __builtin_va_list argList) {
+  *m = __builtin_va_arg(argList, __m512);
+}
+
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+void f64_helper(__m512, ...);
+__m512 x64;
+void f64() {
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
diff --git a/test/CodeGen/x86_64-longdouble.c b/test/CodeGen/x86_64-longdouble.c
new file mode 100644
index 0000000..8baf4d1
--- /dev/null
+++ b/test/CodeGen/x86_64-longdouble.c
@@ -0,0 +1,135 @@
+// RUN: %clang_cc1 -triple x86_64-linux-android -emit-llvm -O -o - %s \
+// RUN:    | FileCheck %s --check-prefix=ANDROID --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -O -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64 -emit-llvm -O -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU --check-prefix=CHECK
+// NaCl is an example of a target for which long double is the same as double.
+// RUN: %clang_cc1 -triple x86_64-nacl -emit-llvm -O -o - %s \
+// RUN:    | FileCheck %s --check-prefix=NACL --check-prefix=CHECK
+
+// Android uses fp128 for long double but other x86_64 targets use x86_fp80.
+
+long double dataLD = 1.0L;
+// ANDROID: @dataLD = global fp128 0xL00000000000000003FFF000000000000, align 16
+// GNU: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 16
+
+long double _Complex dataLDC = {1.0L, 1.0L};
+// ANDROID: @dataLDC = global { fp128, fp128 } { fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000003FFF000000000000 }, align 16
+// GNU: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 16
+
+long double TestLD(long double x) {
+  return x * x;
+// ANDROID: define fp128 @TestLD(fp128 %x)
+// GNU: define x86_fp80 @TestLD(x86_fp80 %x)
+// NACL: define double @TestLD(double %x)
+}
+
+long double _Complex TestLDC(long double _Complex x) {
+  return x * x;
+// ANDROID: define void @TestLDC({ fp128, fp128 }* {{.*}}, { fp128, fp128 }* {{.*}} %x)
+// GNU: define { x86_fp80, x86_fp80 } @TestLDC({ x86_fp80, x86_fp80 }* {{.*}} %x)
+// NACL: define { double, double } @TestLDC(double %x{{.*}}, double %x{{.*}})
+}
+
+typedef __builtin_va_list va_list;
+
+int TestGetVarInt(va_list ap) {
+  return __builtin_va_arg(ap, int);
+// Since int can be passed in memory or register there are two branches.
+// CHECK:   define i32 @TestGetVarInt(
+// CHECK:   br label
+// CHECK:   br label
+// CHECK:   = phi
+// CHECK:   ret i32
+}
+
+double TestGetVarDouble(va_list ap) {
+  return __builtin_va_arg(ap, double);
+// Since double can be passed in memory or register there are two branches.
+// CHECK:   define double @TestGetVarDouble(
+// CHECK:   br label
+// CHECK:   br label
+// CHECK:   = phi
+// CHECK:   ret double
+}
+
+long double TestGetVarLD(va_list ap) {
+  return __builtin_va_arg(ap, long double);
+// fp128 and double can be passed in memory or in register, but x86_fp80 is in
+// memory.
+// ANDROID: define fp128 @TestGetVarLD(
+// GNU:     define x86_fp80 @TestGetVarLD(
+// NACL:     define double @TestGetVarLD(
+// ANDROID: br label
+// ANDROID: br label
+// NACL: br
+// ANDROID: = phi
+// GNU-NOT: br
+// GNU-NOT: = phi
+// NACL: = phi
+// ANDROID: ret fp128
+// GNU:     ret x86_fp80
+}
+
+long double _Complex TestGetVarLDC(va_list ap) {
+  return __builtin_va_arg(ap, long double _Complex);
+// Pair of fp128 or x86_fp80 are passed as struct in memory.
+// ANDROID:   define void @TestGetVarLDC({ fp128, fp128 }* {{.*}}, %struct.__va_list_tag*
+// GNU:       define { x86_fp80, x86_fp80 } @TestGetVarLDC(
+// Pair of double can go in SSE registers or memory
+// NACL:       define { double, double } @TestGetVarLDC(
+// ANDROID-NOT: br
+// GNU-NOT: br
+// NACL: br
+// ANDROID-NOT: phi
+// GNU-NOT: phi
+// NACL: phi
+// ANDROID:   ret void
+// GNU:       ret { x86_fp80, x86_fp80 }
+// NACL:       ret { double, double }
+}
+
+void TestVarArg(const char *s, ...);
+
+void TestPassVarInt(int x) {
+  TestVarArg("A", x);
+// CHECK: define void @TestPassVarInt(i32 %x)
+// CHECK: call {{.*}} @TestVarArg(i8* {{.*}}, i32 %x)
+}
+
+void TestPassVarFloat(float x) {
+  TestVarArg("A", x);
+// CHECK: define void @TestPassVarFloat(float %x)
+// CHECK: call {{.*}} @TestVarArg(i8* {{.*}}, double %
+}
+
+void TestPassVarDouble(double x) {
+  TestVarArg("A", x);
+// CHECK: define void @TestPassVarDouble(double %x)
+// CHECK: call {{.*}} @TestVarArg(i8* {{.*}}, double %x
+}
+
+void TestPassVarLD(long double x) {
+  TestVarArg("A", x);
+// ANDROID: define void @TestPassVarLD(fp128 %x)
+// ANDROID: call {{.*}} @TestVarArg(i8* {{.*}}, fp128 %x
+// GNU: define void @TestPassVarLD(x86_fp80 %x)
+// GNU: call {{.*}} @TestVarArg(i8* {{.*}}, x86_fp80 %x
+// NACL: define void @TestPassVarLD(double %x)
+// NACL: call {{.*}} @TestVarArg(i8* {{.*}}, double %x
+}
+
+void TestPassVarLDC(long double _Complex x) {
+  TestVarArg("A", x);
+// ANDROID:      define void @TestPassVarLDC({ fp128, fp128 }* {{.*}} %x)
+// ANDROID:      store fp128 %{{.*}}, fp128* %
+// ANDROID-NEXT: store fp128 %{{.*}}, fp128* %
+// ANDROID-NEXT: call {{.*}} @TestVarArg(i8* {{.*}}, { fp128, fp128 }* {{.*}} %
+// GNU:          define void @TestPassVarLDC({ x86_fp80, x86_fp80 }* {{.*}} %x)
+// GNU:          store x86_fp80 %{{.*}}, x86_fp80* %
+// GNU-NEXT:     store x86_fp80 %{{.*}}, x86_fp80* %
+// GNU-NEXT:   call {{.*}} @TestVarArg(i8* {{.*}}, { x86_fp80, x86_fp80 }* {{.*}} %
+// NACL:      define void @TestPassVarLDC(double %x{{.*}}, double %x{{.*}})
+// NACL: call {{.*}} @TestVarArg(i8* {{.*}}, double %x{{.*}}, double %x{{.*}})
+}
diff --git a/test/CodeGen/x86_64-profiling-keep-fp.c b/test/CodeGen/x86_64-profiling-keep-fp.c
new file mode 100644
index 0000000..ca679fa
--- /dev/null
+++ b/test/CodeGen/x86_64-profiling-keep-fp.c
@@ -0,0 +1,14 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O3 -pg -S -o - %s | \
+// RUN:   FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O3 -momit-leaf-frame-pointer -pg -S -o - %s | \
+// RUN:   FileCheck %s
+
+// Test that the frame pointer is kept when compiling with
+// profiling.
+
+//CHECK: pushq %rbp
+int main(void)
+{
+  return 0;
+}
diff --git a/test/CodeGen/x86_64-xsave.c b/test/CodeGen/x86_64-xsave.c
new file mode 100644
index 0000000..ed536be
--- /dev/null
+++ b/test/CodeGen/x86_64-xsave.c
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE

+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE

+

+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT

+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT

+

+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsavec -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC

+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsavec -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC

+

+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES

+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES

+

+void test() {

+  unsigned long long tmp_ULLi;

+  void*              tmp_vp;

+

+#ifdef TEST_XSAVE

+// XSAVE: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVE: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVE: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVE: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVE: call void @llvm.x86.xsave(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);

+

+// XSAVE: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVE: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32

+// XSAVE: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32

+// XSAVE: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32

+// XSAVE: call void @llvm.x86.xsave64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])

+  (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);

+

+// XSAVE: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVE: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32

+// XSAVE: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32

+// XSAVE: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32

+// XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])

+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);

+

+// XSAVE: [[tmp_vp_4:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVE: [[tmp_ULLi_4:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVE: [[high64_4:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_4]], 32

+// XSAVE: [[high32_4:%[0-9a-zA-z]+]] = trunc i64 [[high64_4]] to i32

+// XSAVE: [[low32_4:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_4]] to i32

+// XSAVE: call void @llvm.x86.xrstor64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])

+  (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVEOPT

+// XSAVEOPT: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVEOPT: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEOPT: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVEOPT: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVEOPT: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);

+

+// XSAVEOPT: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVEOPT: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEOPT: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32

+// XSAVEOPT: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32

+// XSAVEOPT: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32

+// XSAVEOPT: call void @llvm.x86.xsaveopt64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])

+  (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVEC

+// XSAVEC: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVEC: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEC: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVEC: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVEC: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);

+

+// XSAVEC: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVEC: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVEC: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32

+// XSAVEC: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32

+// XSAVEC: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32

+// XSAVEC: call void @llvm.x86.xsavec64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])

+  (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);

+#endif

+

+#ifdef TEST_XSAVES

+// XSAVES: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVES: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32

+// XSAVES: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32

+// XSAVES: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32

+// XSAVES: call void @llvm.x86.xsaves(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])

+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);

+

+// XSAVES: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVES: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32

+// XSAVES: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32

+// XSAVES: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32

+// XSAVES: call void @llvm.x86.xsaves64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])

+  (void)__builtin_ia32_xsaves64(tmp_vp, tmp_ULLi);

+

+// XSAVES: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVES: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32

+// XSAVES: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32

+// XSAVES: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32

+// XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])

+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);

+

+// XSAVES: [[tmp_vp_4:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8

+// XSAVES: [[tmp_ULLi_4:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8

+// XSAVES: [[high64_4:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_4]], 32

+// XSAVES: [[high32_4:%[0-9a-zA-z]+]] = trunc i64 [[high64_4]] to i32

+// XSAVES: [[low32_4:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_4]] to i32

+// XSAVES: call void @llvm.x86.xrstors64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])

+  (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);

+#endif

+}

diff --git a/test/CodeGen/xcore-abi.c b/test/CodeGen/xcore-abi.c
index 23fb441..2bac78d 100644
--- a/test/CodeGen/xcore-abi.c
+++ b/test/CodeGen/xcore-abi.c
@@ -33,7 +33,7 @@
   f(v1);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i8**
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i8*, i8** [[P]]
   // CHECK: store i8* [[V1]], i8** [[V:%[a-z0-9]+]], align 4
@@ -43,7 +43,7 @@
   char v2 = va_arg (ap, char); // expected-warning{{second argument to 'va_arg' is of promotable type 'char'}}
   f(&v2);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i8, i8* [[I]]
   // CHECK: store i8 [[V1]], i8* [[V:%[a-z0-9]+]], align 1
@@ -53,7 +53,7 @@
   f(&v3);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i32*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i32, i32* [[P]]
   // CHECK: store i32 [[V1]], i32* [[V:%[a-z0-9]+]], align 4
@@ -64,7 +64,7 @@
   f(&v4);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i64*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i64, i64* [[P]]
   // CHECK: store i64 [[V1]], i64* [[V:%[a-z0-9]+]], align 4
@@ -76,7 +76,7 @@
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to %struct.x**
   // CHECK: [[P:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8*
@@ -89,7 +89,7 @@
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to [4 x i32]**
   // CHECK: [[P:%[a-z0-9]+]] = load [4 x i32]*, [4 x i32]** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8*
@@ -104,7 +104,7 @@
   f(&v7);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to double*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load double, double* [[P]]
   // CHECK: store double [[V1]], double* [[V:%[a-z0-9]+]], align 4
diff --git a/test/CodeGen/xop-builtins.c b/test/CodeGen/xop-builtins.c
index 436deaa..5f0f20d 100644
--- a/test/CodeGen/xop-builtins.c
+++ b/test/CodeGen/xop-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,321 +6,385 @@
 #include <x86intrin.h>
 
 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccs_epi16
   // CHECK: @llvm.x86.xop.vpmacssww
   return _mm_maccs_epi16(a, b, c);
 }
 
 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macc_epi16
   // CHECK: @llvm.x86.xop.vpmacsww
   return _mm_macc_epi16(a, b, c);
 }
 
 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccsd_epi16
   // CHECK: @llvm.x86.xop.vpmacsswd
   return _mm_maccsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccd_epi16
   // CHECK: @llvm.x86.xop.vpmacswd
   return _mm_maccd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccs_epi32
   // CHECK: @llvm.x86.xop.vpmacssdd
   return _mm_maccs_epi32(a, b, c);
 }
 
 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macc_epi32
   // CHECK: @llvm.x86.xop.vpmacsdd
   return _mm_macc_epi32(a, b, c);
 }
 
 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccslo_epi32
   // CHECK: @llvm.x86.xop.vpmacssdql
   return _mm_maccslo_epi32(a, b, c);
 }
 
 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macclo_epi32
   // CHECK: @llvm.x86.xop.vpmacsdql
   return _mm_macclo_epi32(a, b, c);
 }
 
 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccshi_epi32
   // CHECK: @llvm.x86.xop.vpmacssdqh
   return _mm_maccshi_epi32(a, b, c);
 }
 
 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macchi_epi32
   // CHECK: @llvm.x86.xop.vpmacsdqh
   return _mm_macchi_epi32(a, b, c);
 }
 
 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maddsd_epi16
   // CHECK: @llvm.x86.xop.vpmadcsswd
   return _mm_maddsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maddd_epi16
   // CHECK: @llvm.x86.xop.vpmadcswd
   return _mm_maddd_epi16(a, b, c);
 }
 
 __m128i test_mm_haddw_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddw_epi8
   // CHECK: @llvm.x86.xop.vphaddbw
   return _mm_haddw_epi8(a);
 }
 
 __m128i test_mm_haddd_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epi8
   // CHECK: @llvm.x86.xop.vphaddbd
   return _mm_haddd_epi8(a);
 }
 
 __m128i test_mm_haddq_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi8
   // CHECK: @llvm.x86.xop.vphaddbq
   return _mm_haddq_epi8(a);
 }
 
 __m128i test_mm_haddd_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epi16
   // CHECK: @llvm.x86.xop.vphaddwd
   return _mm_haddd_epi16(a);
 }
 
 __m128i test_mm_haddq_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi16
   // CHECK: @llvm.x86.xop.vphaddwq
   return _mm_haddq_epi16(a);
 }
 
 __m128i test_mm_haddq_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi32
   // CHECK: @llvm.x86.xop.vphadddq
   return _mm_haddq_epi32(a);
 }
 
 __m128i test_mm_haddw_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddw_epu8
   // CHECK: @llvm.x86.xop.vphaddubw
   return _mm_haddw_epu8(a);
 }
 
 __m128i test_mm_haddd_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epu8
   // CHECK: @llvm.x86.xop.vphaddubd
   return _mm_haddd_epu8(a);
 }
 
 __m128i test_mm_haddq_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu8
   // CHECK: @llvm.x86.xop.vphaddubq
   return _mm_haddq_epu8(a);
 }
 
 __m128i test_mm_haddd_epu16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epu16
   // CHECK: @llvm.x86.xop.vphadduwd
   return _mm_haddd_epu16(a);
 }
 
 __m128i test_mm_haddq_epu16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu16
   // CHECK: @llvm.x86.xop.vphadduwq
   return _mm_haddq_epu16(a);
 }
 
 __m128i test_mm_haddq_epu32(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu32
   // CHECK: @llvm.x86.xop.vphaddudq
   return _mm_haddq_epu32(a);
 }
 
 __m128i test_mm_hsubw_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubw_epi8
   // CHECK: @llvm.x86.xop.vphsubbw
   return _mm_hsubw_epi8(a);
 }
 
 __m128i test_mm_hsubd_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubd_epi16
   // CHECK: @llvm.x86.xop.vphsubwd
   return _mm_hsubd_epi16(a);
 }
 
 __m128i test_mm_hsubq_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubq_epi32
   // CHECK: @llvm.x86.xop.vphsubdq
   return _mm_hsubq_epi32(a);
 }
 
 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_cmov_si128
   // CHECK: @llvm.x86.xop.vpcmov
   return _mm_cmov_si128(a, b, c);
 }
 
 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
+  // CHECK-LABEL: test_mm256_cmov_si256
   // CHECK: @llvm.x86.xop.vpcmov.256
   return _mm256_cmov_si256(a, b, c);
 }
 
 __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_perm_epi8
   // CHECK: @llvm.x86.xop.vpperm
   return _mm_perm_epi8(a, b, c);
 }
 
 __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi8
   // CHECK: @llvm.x86.xop.vprotb
   return _mm_rot_epi8(a, b);
 }
 
 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi16
   // CHECK: @llvm.x86.xop.vprotw
   return _mm_rot_epi16(a, b);
 }
 
 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi32
   // CHECK: @llvm.x86.xop.vprotd
   return _mm_rot_epi32(a, b);
 }
 
 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi64
   // CHECK: @llvm.x86.xop.vprotq
   return _mm_rot_epi64(a, b);
 }
 
 __m128i test_mm_roti_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi8
   // CHECK: @llvm.x86.xop.vprotbi
   return _mm_roti_epi8(a, 1);
 }
 
 __m128i test_mm_roti_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi16
   // CHECK: @llvm.x86.xop.vprotwi
   return _mm_roti_epi16(a, 50);
 }
 
 __m128i test_mm_roti_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi32
   // CHECK: @llvm.x86.xop.vprotdi
   return _mm_roti_epi32(a, -30);
 }
 
 __m128i test_mm_roti_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi64
   // CHECK: @llvm.x86.xop.vprotqi
   return _mm_roti_epi64(a, 100);
 }
 
 __m128i test_mm_shl_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi8
   // CHECK: @llvm.x86.xop.vpshlb
   return _mm_shl_epi8(a, b);
 }
 
 __m128i test_mm_shl_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi16
   // CHECK: @llvm.x86.xop.vpshlw
   return _mm_shl_epi16(a, b);
 }
 
 __m128i test_mm_shl_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi32
   // CHECK: @llvm.x86.xop.vpshld
   return _mm_shl_epi32(a, b);
 }
 
 __m128i test_mm_shl_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi64
   // CHECK: @llvm.x86.xop.vpshlq
   return _mm_shl_epi64(a, b);
 }
 
 __m128i test_mm_sha_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi8
   // CHECK: @llvm.x86.xop.vpshab
   return _mm_sha_epi8(a, b);
 }
 
 __m128i test_mm_sha_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi16
   // CHECK: @llvm.x86.xop.vpshaw
   return _mm_sha_epi16(a, b);
 }
 
 __m128i test_mm_sha_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi32
   // CHECK: @llvm.x86.xop.vpshad
   return _mm_sha_epi32(a, b);
 }
 
 __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi64
   // CHECK: @llvm.x86.xop.vpshaq
   return _mm_sha_epi64(a, b);
 }
 
 __m128i test_mm_com_epu8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu8
   // CHECK: @llvm.x86.xop.vpcomub
   return _mm_com_epu8(a, b, 0);
 }
 
 __m128i test_mm_com_epu16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu16
   // CHECK: @llvm.x86.xop.vpcomuw
   return _mm_com_epu16(a, b, 0);
 }
 
 __m128i test_mm_com_epu32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu32
   // CHECK: @llvm.x86.xop.vpcomud
   return _mm_com_epu32(a, b, 0);
 }
 
 __m128i test_mm_com_epu64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu64
   // CHECK: @llvm.x86.xop.vpcomuq
   return _mm_com_epu64(a, b, 0);
 }
 
 __m128i test_mm_com_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi8
   // CHECK: @llvm.x86.xop.vpcomb
   return _mm_com_epi8(a, b, 0);
 }
 
 __m128i test_mm_com_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi16
   // CHECK: @llvm.x86.xop.vpcomw
   return _mm_com_epi16(a, b, 0);
 }
 
 __m128i test_mm_com_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi32
   // CHECK: @llvm.x86.xop.vpcomd
   return _mm_com_epi32(a, b, 0);
 }
 
 __m128i test_mm_com_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi64
   // CHECK: @llvm.x86.xop.vpcomq
   return _mm_com_epi64(a, b, 0);
 }
 
 __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
+  // CHECK-LABEL: test_mm_permute2_pd
   // CHECK: @llvm.x86.xop.vpermil2pd
   return _mm_permute2_pd(a, b, c, 0);
 }
 
 __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
+  // CHECK-LABEL: test_mm256_permute2_pd
   // CHECK: @llvm.x86.xop.vpermil2pd.256
   return _mm256_permute2_pd(a, b, c, 0);
 }
 
 __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
+  // CHECK-LABEL: test_mm_permute2_ps
   // CHECK: @llvm.x86.xop.vpermil2ps
   return _mm_permute2_ps(a, b, c, 0);
 }
 
 __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
+  // CHECK-LABEL: test_mm256_permute2_ps
   // CHECK: @llvm.x86.xop.vpermil2ps.256
   return _mm256_permute2_ps(a, b, c, 0);
 }
 
 __m128 test_mm_frcz_ss(__m128 a) {
+  // CHECK-LABEL: test_mm_frcz_ss
   // CHECK: @llvm.x86.xop.vfrcz.ss
   return _mm_frcz_ss(a);
 }
 
 __m128d test_mm_frcz_sd(__m128d a) {
+  // CHECK-LABEL: test_mm_frcz_sd
   // CHECK: @llvm.x86.xop.vfrcz.sd
   return _mm_frcz_sd(a);
 }
 
 __m128 test_mm_frcz_ps(__m128 a) {
+  // CHECK-LABEL: test_mm_frcz_ps
   // CHECK: @llvm.x86.xop.vfrcz.ps
   return _mm_frcz_ps(a);
 }
 
 __m128d test_mm_frcz_pd(__m128d a) {
+  // CHECK-LABEL: test_mm_frcz_pd
   // CHECK: @llvm.x86.xop.vfrcz.pd
   return _mm_frcz_pd(a);
 }
 
 __m256 test_mm256_frcz_ps(__m256 a) {
+  // CHECK-LABEL: test_mm256_frcz_ps
   // CHECK: @llvm.x86.xop.vfrcz.ps.256
   return _mm256_frcz_ps(a);
 }
 
 __m256d test_mm256_frcz_pd(__m256d a) {
+  // CHECK-LABEL: test_mm256_frcz_pd
   // CHECK: @llvm.x86.xop.vfrcz.pd.256
   return _mm256_frcz_pd(a);
 }
diff --git a/test/CodeGen/zvector.c b/test/CodeGen/zvector.c
new file mode 100644
index 0000000..ebe7e41
--- /dev/null
+++ b/test/CodeGen/zvector.c
@@ -0,0 +1,2798 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu -target-cpu z13 -fzvector \
+// RUN:  -O -emit-llvm -o - -W -Wall -Werror %s | FileCheck %s
+
+volatile vector signed char sc, sc2;
+volatile vector unsigned char uc, uc2;
+volatile vector bool char bc, bc2;
+
+volatile vector signed short ss, ss2;
+volatile vector unsigned short us, us2;
+volatile vector bool short bs, bs2;
+
+volatile vector signed int si, si2;
+volatile vector unsigned int ui, ui2;
+volatile vector bool int bi, bi2;
+
+volatile vector signed long long sl, sl2;
+volatile vector unsigned long long ul, ul2;
+volatile vector bool long long bl, bl2;
+
+volatile vector double fd, fd2;
+
+volatile int cnt;
+
+void test_assign (void)
+{
+// CHECK-LABEL: test_assign
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: store volatile <16 x i8> [[VAL]], <16 x i8>* @sc
+  sc = sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: store volatile <16 x i8> [[VAL]], <16 x i8>* @uc
+  uc = uc2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: store volatile <8 x i16> [[VAL]], <8 x i16>* @ss
+  ss = ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: store volatile <8 x i16> [[VAL]], <8 x i16>* @us
+  us = us2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: store volatile <4 x i32> [[VAL]], <4 x i32>* @si
+  si = si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: store volatile <4 x i32> [[VAL]], <4 x i32>* @ui
+  ui = ui2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: store volatile <2 x i64> [[VAL]], <2 x i64>* @sl
+  sl = sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: store volatile <2 x i64> [[VAL]], <2 x i64>* @ul
+  ul = ul2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: store volatile <2 x double> [[VAL]], <2 x double>* @fd
+  fd = fd2;
+}
+
+void test_pos (void)
+{
+// CHECK-LABEL: test_pos
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: store volatile <16 x i8> [[VAL]], <16 x i8>* @sc
+  sc = +sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: store volatile <16 x i8> [[VAL]], <16 x i8>* @uc
+  uc = +uc2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: store volatile <8 x i16> [[VAL]], <8 x i16>* @ss
+  ss = +ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: store volatile <8 x i16> [[VAL]], <8 x i16>* @us
+  us = +us2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: store volatile <4 x i32> [[VAL]], <4 x i32>* @si
+  si = +si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: store volatile <4 x i32> [[VAL]], <4 x i32>* @ui
+  ui = +ui2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: store volatile <2 x i64> [[VAL]], <2 x i64>* @sl
+  sl = +sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: store volatile <2 x i64> [[VAL]], <2 x i64>* @ul
+  ul = +ul2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: store volatile <2 x double> [[VAL]], <2 x double>* @fd
+  fd = +fd2;
+}
+
+void test_neg (void)
+{
+// CHECK-LABEL: test_neg
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = sub <16 x i8> zeroinitializer, [[VAL]]
+  sc = -sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = sub <8 x i16> zeroinitializer, [[VAL]]
+  ss = -ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = sub <4 x i32> zeroinitializer, [[VAL]]
+  si = -si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = sub <2 x i64> zeroinitializer, [[VAL]]
+  sl = -sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[VAL]]
+  fd = -fd2;
+}
+
+void test_preinc (void)
+{
+// CHECK-LABEL: test_preinc
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ++sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ++uc2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ++ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ++us2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 1, i32 1, i32 1, i32 1>
+  ++si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 1, i32 1, i32 1, i32 1>
+  ++ui2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 1, i64 1>
+  ++sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 1, i64 1>
+  ++ul2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL]], <double 1.000000e+00, double 1.000000e+00>
+  ++fd2;
+}
+
+void test_postinc (void)
+{
+// CHECK-LABEL: test_postinc
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  sc2++;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  uc2++;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ss2++;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  us2++;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 1, i32 1, i32 1, i32 1>
+  si2++;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 1, i32 1, i32 1, i32 1>
+  ui2++;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 1, i64 1>
+  sl2++;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 1, i64 1>
+  ul2++;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL]], <double 1.000000e+00, double 1.000000e+00>
+  fd2++;
+}
+
+void test_predec (void)
+{
+// CHECK-LABEL: test_predec
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  --sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  --uc2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  --ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  --us2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  --si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  --ui2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  --sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  --ul2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL]], <double -1.000000e+00, double -1.000000e+00>
+  --fd2;
+}
+
+void test_postdec (void)
+{
+// CHECK-LABEL: test_postdec
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  sc2--;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  uc2--;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ss2--;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  us2--;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  si2--;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  ui2--;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  sl2--;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  ul2--;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL]], <double -1.000000e+00, double -1.000000e+00>
+  fd2--;
+}
+
+void test_add (void)
+{
+// CHECK-LABEL: test_add
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc + sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc + bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  sc = bc + sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc + uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc + bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = add <16 x i8> [[VAL2]], [[VAL1]]
+  uc = bc + uc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss + ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss + bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  ss = bs + ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  us = us + us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  us = us + bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = add <8 x i16> [[VAL2]], [[VAL1]]
+  us = bs + us2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  si = si + si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  si = si + bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  si = bi + si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui + ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui + bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = add <4 x i32> [[VAL2]], [[VAL1]]
+  ui = bi + ui2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl + sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl + bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  sl = bl + sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul + ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul + bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = add <2 x i64> [[VAL2]], [[VAL1]]
+  ul = bl + ul2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL1]], [[VAL2]]
+  fd = fd + fd2;
+}
+
+void test_add_assign (void)
+{
+// CHECK-LABEL: test_add_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = add <16 x i8> [[VAL1]], [[VAL2]]
+  sc += sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = add <16 x i8> [[VAL1]], [[VAL2]]
+  sc += bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = add <16 x i8> [[VAL1]], [[VAL2]]
+  uc += uc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = add <16 x i8> [[VAL1]], [[VAL2]]
+  uc += bc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = add <8 x i16> [[VAL1]], [[VAL2]]
+  ss += ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = add <8 x i16> [[VAL1]], [[VAL2]]
+  ss += bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = add <8 x i16> [[VAL1]], [[VAL2]]
+  us += us2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = add <8 x i16> [[VAL1]], [[VAL2]]
+  us += bs2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = add <4 x i32> [[VAL1]], [[VAL2]]
+  si += si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = add <4 x i32> [[VAL1]], [[VAL2]]
+  si += bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = add <4 x i32> [[VAL1]], [[VAL2]]
+  ui += ui2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = add <4 x i32> [[VAL1]], [[VAL2]]
+  ui += bi2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = add <2 x i64> [[VAL1]], [[VAL2]]
+  sl += sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = add <2 x i64> [[VAL1]], [[VAL2]]
+  sl += bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = add <2 x i64> [[VAL1]], [[VAL2]]
+  ul += ul2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = add <2 x i64> [[VAL1]], [[VAL2]]
+  ul += bl2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: %{{.*}} = fadd <2 x double> [[VAL2]], [[VAL1]]
+  fd += fd2;
+}
+
+void test_sub (void)
+{
+// CHECK-LABEL: test_sub
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc - sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc - bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  sc = bc - sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc - uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc - bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  uc = bc - uc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss - ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss - bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  ss = bs - ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  us = us - us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  us = us - bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  us = bs - us2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  si = si - si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  si = si - bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  si = bi - si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui - ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui - bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  ui = bi - ui2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl - sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl - bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  sl = bl - sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul - ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul - bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  ul = bl - ul2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fsub <2 x double> [[VAL1]], [[VAL2]]
+  fd = fd - fd2;
+}
+
+void test_sub_assign (void)
+{
+// CHECK-LABEL: test_sub_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  sc -= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  sc -= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  uc -= uc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = sub <16 x i8> [[VAL1]], [[VAL2]]
+  uc -= bc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  ss -= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  ss -= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  us -= us2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = sub <8 x i16> [[VAL1]], [[VAL2]]
+  us -= bs2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  si -= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  si -= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  ui -= ui2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = sub <4 x i32> [[VAL1]], [[VAL2]]
+  ui -= bi2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  sl -= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  sl -= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  ul -= ul2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = sub <2 x i64> [[VAL1]], [[VAL2]]
+  ul -= bl2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: %{{.*}} = fsub <2 x double> [[VAL1]], [[VAL2]]
+  fd -= fd2;
+}
+
+void test_mul (void)
+{
+// CHECK-LABEL: test_mul
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = mul <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc * sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = mul <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc * uc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = mul <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss * ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = mul <8 x i16> [[VAL2]], [[VAL1]]
+  us = us * us2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = mul <4 x i32> [[VAL2]], [[VAL1]]
+  si = si * si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = mul <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui * ui2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = mul <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl * sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = mul <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul * ul2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fmul <2 x double> [[VAL1]], [[VAL2]]
+  fd = fd * fd2;
+}
+
+void test_mul_assign (void)
+{
+// CHECK-LABEL: test_mul_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = mul <16 x i8> [[VAL1]], [[VAL2]]
+  sc *= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = mul <16 x i8> [[VAL1]], [[VAL2]]
+  uc *= uc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = mul <8 x i16> [[VAL1]], [[VAL2]]
+  ss *= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = mul <8 x i16> [[VAL1]], [[VAL2]]
+  us *= us2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = mul <4 x i32> [[VAL1]], [[VAL2]]
+  si *= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = mul <4 x i32> [[VAL1]], [[VAL2]]
+  ui *= ui2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = mul <2 x i64> [[VAL1]], [[VAL2]]
+  sl *= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = mul <2 x i64> [[VAL1]], [[VAL2]]
+  ul *= ul2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: %{{.*}} = fmul <2 x double> [[VAL2]], [[VAL1]]
+  fd *= fd2;
+}
+
+void test_div (void)
+{
+// CHECK-LABEL: test_div
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = sdiv <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc / sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = udiv <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc / uc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = sdiv <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss / ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = udiv <8 x i16> [[VAL1]], [[VAL2]]
+  us = us / us2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = sdiv <4 x i32> [[VAL1]], [[VAL2]]
+  si = si / si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = udiv <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui / ui2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = sdiv <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl / sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = udiv <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul / ul2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: %{{.*}} = fdiv <2 x double> [[VAL1]], [[VAL2]]
+  fd = fd / fd2;
+}
+
+void test_div_assign (void)
+{
+// CHECK-LABEL: test_div_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = sdiv <16 x i8> [[VAL1]], [[VAL2]]
+  sc /= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = udiv <16 x i8> [[VAL1]], [[VAL2]]
+  uc /= uc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = sdiv <8 x i16> [[VAL1]], [[VAL2]]
+  ss /= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = udiv <8 x i16> [[VAL1]], [[VAL2]]
+  us /= us2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = sdiv <4 x i32> [[VAL1]], [[VAL2]]
+  si /= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = udiv <4 x i32> [[VAL1]], [[VAL2]]
+  ui /= ui2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = sdiv <2 x i64> [[VAL1]], [[VAL2]]
+  sl /= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = udiv <2 x i64> [[VAL1]], [[VAL2]]
+  ul /= ul2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: %{{.*}} = fdiv <2 x double> [[VAL1]], [[VAL2]]
+  fd /= fd2;
+}
+
+void test_rem (void)
+{
+// CHECK-LABEL: test_rem
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = srem <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc % sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = urem <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc % uc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = srem <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss % ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = urem <8 x i16> [[VAL1]], [[VAL2]]
+  us = us % us2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = srem <4 x i32> [[VAL1]], [[VAL2]]
+  si = si % si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = urem <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui % ui2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = srem <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl % sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = urem <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul % ul2;
+}
+
+void test_rem_assign (void)
+{
+// CHECK-LABEL: test_rem_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = srem <16 x i8> [[VAL1]], [[VAL2]]
+  sc %= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = urem <16 x i8> [[VAL1]], [[VAL2]]
+  uc %= uc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = srem <8 x i16> [[VAL1]], [[VAL2]]
+  ss %= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = urem <8 x i16> [[VAL1]], [[VAL2]]
+  us %= us2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = srem <4 x i32> [[VAL1]], [[VAL2]]
+  si %= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = urem <4 x i32> [[VAL1]], [[VAL2]]
+  ui %= ui2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = srem <2 x i64> [[VAL1]], [[VAL2]]
+  sl %= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = urem <2 x i64> [[VAL1]], [[VAL2]]
+  ul %= ul2;
+}
+
+void test_not (void)
+{
+// CHECK-LABEL: test_not
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  sc = ~sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  uc = ~uc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  bc = ~bc2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ss = ~ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  us = ~us2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  bs = ~bs2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  si = ~si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  ui = ~ui2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  bi = ~bi2;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  sl = ~sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  ul = ~ul2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL]], <i64 -1, i64 -1>
+  bl = ~bl2;
+}
+
+void test_and (void)
+{
+// CHECK-LABEL: test_and
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc & sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc & bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  sc = bc & sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc & uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc & bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  uc = bc & uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = and <16 x i8> [[VAL2]], [[VAL1]]
+  bc = bc & bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss & ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss & bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  ss = bs & ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  us = us & us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  us = us & bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  us = bs & us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = and <8 x i16> [[VAL2]], [[VAL1]]
+  bs = bs & bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  si = si & si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  si = si & bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  si = bi & si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui & ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui & bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  ui = bi & ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = and <4 x i32> [[VAL2]], [[VAL1]]
+  bi = bi & bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl & sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl & bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  sl = bl & sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul & ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul & bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  ul = bl & ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = and <2 x i64> [[VAL2]], [[VAL1]]
+  bl = bl & bl2;
+}
+
+void test_and_assign (void)
+{
+// CHECK-LABEL: test_and_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = and <16 x i8> [[VAL1]], [[VAL2]]
+  sc &= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = and <16 x i8> [[VAL1]], [[VAL2]]
+  sc &= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = and <16 x i8> [[VAL1]], [[VAL2]]
+  uc &= uc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = and <16 x i8> [[VAL1]], [[VAL2]]
+  uc &= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: %{{.*}} = and <16 x i8> [[VAL1]], [[VAL2]]
+  bc &= bc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = and <8 x i16> [[VAL1]], [[VAL2]]
+  ss &= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = and <8 x i16> [[VAL1]], [[VAL2]]
+  ss &= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = and <8 x i16> [[VAL1]], [[VAL2]]
+  us &= us2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = and <8 x i16> [[VAL1]], [[VAL2]]
+  us &= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: %{{.*}} = and <8 x i16> [[VAL1]], [[VAL2]]
+  bs &= bs2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = and <4 x i32> [[VAL1]], [[VAL2]]
+  si &= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = and <4 x i32> [[VAL1]], [[VAL2]]
+  si &= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = and <4 x i32> [[VAL1]], [[VAL2]]
+  ui &= ui2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = and <4 x i32> [[VAL1]], [[VAL2]]
+  ui &= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: %{{.*}} = and <4 x i32> [[VAL1]], [[VAL2]]
+  bi &= bi2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = and <2 x i64> [[VAL1]], [[VAL2]]
+  sl &= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = and <2 x i64> [[VAL1]], [[VAL2]]
+  sl &= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = and <2 x i64> [[VAL1]], [[VAL2]]
+  ul &= ul2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = and <2 x i64> [[VAL1]], [[VAL2]]
+  ul &= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: %{{.*}} = and <2 x i64> [[VAL1]], [[VAL2]]
+  bl &= bl2;
+}
+
+void test_or (void)
+{
+// CHECK-LABEL: test_or
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc | sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  sc = sc | bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  sc = bc | sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc | uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  uc = uc | bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  uc = bc | uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = or <16 x i8> [[VAL2]], [[VAL1]]
+  bc = bc | bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss | ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  ss = ss | bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  ss = bs | ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  us = us | us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  us = us | bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  us = bs | us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = or <8 x i16> [[VAL2]], [[VAL1]]
+  bs = bs | bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  si = si | si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  si = si | bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  si = bi | si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui | ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  ui = ui | bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  ui = bi | ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = or <4 x i32> [[VAL2]], [[VAL1]]
+  bi = bi | bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl | sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  sl = sl | bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  sl = bl | sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul | ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  ul = ul | bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  ul = bl | ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = or <2 x i64> [[VAL2]], [[VAL1]]
+  bl = bl | bl2;
+}
+
+void test_or_assign (void)
+{
+// CHECK-LABEL: test_or_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = or <16 x i8> [[VAL1]], [[VAL2]]
+  sc |= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = or <16 x i8> [[VAL1]], [[VAL2]]
+  sc |= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = or <16 x i8> [[VAL1]], [[VAL2]]
+  uc |= uc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = or <16 x i8> [[VAL1]], [[VAL2]]
+  uc |= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: %{{.*}} = or <16 x i8> [[VAL1]], [[VAL2]]
+  bc |= bc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = or <8 x i16> [[VAL1]], [[VAL2]]
+  ss |= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = or <8 x i16> [[VAL1]], [[VAL2]]
+  ss |= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = or <8 x i16> [[VAL1]], [[VAL2]]
+  us |= us2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = or <8 x i16> [[VAL1]], [[VAL2]]
+  us |= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: %{{.*}} = or <8 x i16> [[VAL1]], [[VAL2]]
+  bs |= bs2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = or <4 x i32> [[VAL1]], [[VAL2]]
+  si |= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = or <4 x i32> [[VAL1]], [[VAL2]]
+  si |= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = or <4 x i32> [[VAL1]], [[VAL2]]
+  ui |= ui2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = or <4 x i32> [[VAL1]], [[VAL2]]
+  ui |= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: %{{.*}} = or <4 x i32> [[VAL1]], [[VAL2]]
+  bi |= bi2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = or <2 x i64> [[VAL1]], [[VAL2]]
+  sl |= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = or <2 x i64> [[VAL1]], [[VAL2]]
+  sl |= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = or <2 x i64> [[VAL1]], [[VAL2]]
+  ul |= ul2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = or <2 x i64> [[VAL1]], [[VAL2]]
+  ul |= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: %{{.*}} = or <2 x i64> [[VAL1]], [[VAL2]]
+  bl |= bl2;
+}
+
+void test_xor (void)
+{
+// CHECK-LABEL: test_xor
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc ^ sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  sc = sc ^ bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  sc = bc ^ sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc ^ uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  uc = uc ^ bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  uc = bc ^ uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL1]], [[VAL2]]
+  bc = bc ^ bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss ^ ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  ss = ss ^ bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  ss = bs ^ ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  us = us ^ us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  us = us ^ bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  us = bs ^ us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL1]], [[VAL2]]
+  bs = bs ^ bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  si = si ^ si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  si = si ^ bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  si = bi ^ si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui ^ ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  ui = ui ^ bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  ui = bi ^ ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL1]], [[VAL2]]
+  bi = bi ^ bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl ^ sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  sl = sl ^ bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  sl = bl ^ sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul ^ ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  ul = ul ^ bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  ul = bl ^ ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL1]], [[VAL2]]
+  bl = bl ^ bl2;
+}
+
+void test_xor_assign (void)
+{
+// CHECK-LABEL: test_xor_assign
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL2]], [[VAL1]]
+  sc ^= sc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL2]], [[VAL1]]
+  sc ^= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL2]], [[VAL1]]
+  uc ^= uc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL2]], [[VAL1]]
+  uc ^= bc2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: %{{.*}} = xor <16 x i8> [[VAL2]], [[VAL1]]
+  bc ^= bc2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL2]], [[VAL1]]
+  ss ^= ss2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL2]], [[VAL1]]
+  ss ^= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL2]], [[VAL1]]
+  us ^= us2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL2]], [[VAL1]]
+  us ^= bs2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: %{{.*}} = xor <8 x i16> [[VAL2]], [[VAL1]]
+  bs ^= bs2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL2]], [[VAL1]]
+  si ^= si2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL2]], [[VAL1]]
+  si ^= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL2]], [[VAL1]]
+  ui ^= ui2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL2]], [[VAL1]]
+  ui ^= bi2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: %{{.*}} = xor <4 x i32> [[VAL2]], [[VAL1]]
+  bi ^= bi2;
+
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL2]], [[VAL1]]
+  sl ^= sl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL2]], [[VAL1]]
+  sl ^= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL2]], [[VAL1]]
+  ul ^= ul2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL2]], [[VAL1]]
+  ul ^= bl2;
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: %{{.*}} = xor <2 x i64> [[VAL2]], [[VAL1]]
+  bl ^= bl2;
+}
+
+void test_sl (void)
+{
+// CHECK-LABEL: test_sl
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc = sc << sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc = sc << uc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc = sc << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  sc = sc << 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc = uc << sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc = uc << uc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc = uc << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  uc = uc << 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss = ss << ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss = ss << us2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss = ss << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ss = ss << 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us = us << ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us = us << us2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us = us << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  us = us << 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si = si << si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si = si << ui2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si = si << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  si = si << 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui = ui << si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui = ui << ui2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui = ui << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  ui = ui << 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl = sl << sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl = sl << ul2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl = sl << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], <i64 5, i64 5>
+  sl = sl << 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul = ul << sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul = ul << ul2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul = ul << cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], <i64 5, i64 5>
+  ul = ul << 5;
+}
+
+void test_sl_assign (void)
+{
+// CHECK-LABEL: test_sl_assign
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc <<= sc2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc <<= uc2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  sc <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  sc <<= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc <<= sc2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc <<= uc2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], [[CNT]]
+  uc <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = shl <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  uc <<= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss <<= ss2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss <<= us2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  ss <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ss <<= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us <<= ss2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us <<= us2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], [[CNT]]
+  us <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = shl <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  us <<= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si <<= si2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si <<= ui2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  si <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  si <<= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui <<= si2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui <<= ui2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], [[CNT]]
+  ui <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = shl <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  ui <<= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl <<= sl2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl <<= ul2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  sl <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], <i64 5, i64 5>
+  sl <<= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul <<= sl2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul <<= ul2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], [[CNT]]
+  ul <<= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = shl <2 x i64> [[VAL]], <i64 5, i64 5>
+  ul <<= 5;
+}
+
+void test_sr (void)
+{
+// CHECK-LABEL: test_sr
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc = sc >> sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc = sc >> uc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc = sc >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  sc = sc >> 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc = uc >> sc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc = uc >> uc2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc = uc >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  uc = uc >> 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss = ss >> ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss = ss >> us2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss = ss >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ss = ss >> 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us = us >> ss2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us = us >> us2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us = us >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  us = us >> 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si = si >> si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si = si >> ui2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si = si >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  si = si >> 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui = ui >> si2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui = ui >> ui2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui = ui >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  ui = ui >> 5;
+
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl = sl >> sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl = sl >> ul2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl = sl >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], <i64 5, i64 5>
+  sl = sl >> 5;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul = ul >> sl2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul = ul >> ul2;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul = ul >> cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], <i64 5, i64 5>
+  ul = ul >> 5;
+}
+
+void test_sr_assign (void)
+{
+// CHECK-LABEL: test_sr_assign
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc >>= sc2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc >>= uc2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], [[CNT]]
+  sc >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: %{{.*}} = ashr <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  sc >>= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc >>= sc2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc >>= uc2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <16 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <16 x i32> [[T2]], <16 x i32> undef, <16 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[CNT:%[^ ]+]] = trunc <16 x i32> [[T3]] to <16 x i8>
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], [[CNT]]
+  uc >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: %{{.*}} = lshr <16 x i8> [[VAL]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  uc >>= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss >>= ss2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss >>= us2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], [[CNT]]
+  ss >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: %{{.*}} = ashr <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ss >>= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us >>= ss2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us >>= us2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <8 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <8 x i32> [[T2]], <8 x i32> undef, <8 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[CNT:%[^ ]+]] = trunc <8 x i32> [[T3]] to <8 x i16>
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], [[CNT]]
+  us >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: %{{.*}} = lshr <8 x i16> [[VAL]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  us >>= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si >>= si2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si >>= ui2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], [[CNT]]
+  si >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: %{{.*}} = ashr <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  si >>= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui >>= si2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui >>= ui2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T3:%[^ ]+]] = insertelement <4 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[CNT:%[^ ]+]] = shufflevector <4 x i32> [[T3]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], [[CNT]]
+  ui >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: %{{.*}} = lshr <4 x i32> [[VAL]], <i32 5, i32 5, i32 5, i32 5>
+  ui >>= 5;
+
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl >>= sl2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl >>= ul2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], [[CNT]]
+  sl >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: %{{.*}} = ashr <2 x i64> [[VAL]], <i64 5, i64 5>
+  sl >>= 5;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul >>= sl2;
+// CHECK: [[CNT:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul >>= ul2;
+// CHECK: [[T1:%[^ ]+]] = load volatile i32, i32* @cnt
+// CHECK: [[T2:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[T1]], i32 0
+// CHECK: [[T3:%[^ ]+]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <2 x i32> zeroinitializer
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[CNT:%[^ ]+]] = zext <2 x i32> [[T3]] to <2 x i64>
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], [[CNT]]
+  ul >>= cnt;
+// CHECK: [[VAL:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: %{{.*}} = lshr <2 x i64> [[VAL]], <i64 5, i64 5>
+  ul >>= 5;
+}
+
+
+void test_cmpeq (void)
+{
+// CHECK-LABEL: test_cmpeq
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc == sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc == bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc == sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc == uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc == bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc == uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc == bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss == ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss == bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs == ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us == us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us == bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs == us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs == bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si == si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si == bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi == si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui == ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui == bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi == ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi == bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl == sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl == bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl == sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul == ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul == bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl == ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp eq <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl == bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp oeq <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd == fd2;
+}
+
+void test_cmpne (void)
+{
+// CHECK-LABEL: test_cmpne
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc != sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc != bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc != sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc != uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc != bc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc != uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc != bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss != ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss != bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs != ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us != us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us != bs2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs != us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs != bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si != si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si != bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi != si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui != ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui != bi2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi != ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi != bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl != sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl != bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl != sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul != ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul != bl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl != ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ne <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl != bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp une <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd != fd2;
+}
+
+void test_cmpge (void)
+{
+// CHECK-LABEL: test_cmpge
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp sge <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc >= sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc >= uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc >= bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp sge <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss >= ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us >= us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs >= bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp sge <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si >= si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui >= ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi >= bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp sge <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl >= sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul >= ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp uge <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl >= bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp oge <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd >= fd2;
+}
+
+void test_cmpgt (void)
+{
+// CHECK-LABEL: test_cmpgt
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp sgt <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc > sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc > uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc > bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp sgt <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss > ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us > us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs > bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp sgt <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si > si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui > ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi > bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp sgt <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl > sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul > ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ugt <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl > bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp ogt <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd > fd2;
+}
+
+void test_cmple (void)
+{
+// CHECK-LABEL: test_cmple
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp sle <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc <= sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc <= uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc <= bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp sle <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss <= ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us <= us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs <= bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp sle <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si <= si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui <= ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi <= bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp sle <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl <= sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul <= ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ule <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl <= bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp ole <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd <= fd2;
+}
+
+void test_cmplt (void)
+{
+// CHECK-LABEL: test_cmplt
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @sc2
+// CHECK: [[CMP:%[^ ]+]] = icmp slt <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = sc < sc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @uc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = uc < uc2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <16 x i8>, <16 x i8>* @bc2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <16 x i8> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <16 x i1> [[CMP]] to <16 x i8>
+  bc = bc < bc2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @ss2
+// CHECK: [[CMP:%[^ ]+]] = icmp slt <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = ss < ss2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @us2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = us < us2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <8 x i16>, <8 x i16>* @bs2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <8 x i16> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <8 x i1> [[CMP]] to <8 x i16>
+  bs = bs < bs2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @si2
+// CHECK: [[CMP:%[^ ]+]] = icmp slt <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = si < si2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @ui2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = ui < ui2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <4 x i32>, <4 x i32>* @bi2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <4 x i32> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <4 x i1> [[CMP]] to <4 x i32>
+  bi = bi < bi2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @sl2
+// CHECK: [[CMP:%[^ ]+]] = icmp slt <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = sl < sl2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @ul2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = ul < ul2;
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x i64>, <2 x i64>* @bl2
+// CHECK: [[CMP:%[^ ]+]] = icmp ult <2 x i64> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = bl < bl2;
+
+// CHECK: [[VAL1:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd
+// CHECK: [[VAL2:%[^ ]+]] = load volatile <2 x double>, <2 x double>* @fd2
+// CHECK: [[CMP:%[^ ]+]] = fcmp olt <2 x double> [[VAL1]], [[VAL2]]
+// CHECK: %{{.*}} = sext <2 x i1> [[CMP]] to <2 x i64>
+  bl = fd < fd2;
+}
+